LAPACK  3.4.1
LAPACK: Linear Algebra PACKage
sgesvd.f
Go to the documentation of this file.
00001 *> \brief <b> SGESVD computes the singular value decomposition (SVD) for GE matrices</b>
00002 *
00003 *  =========== DOCUMENTATION ===========
00004 *
00005 * Online html documentation available at 
00006 *            http://www.netlib.org/lapack/explore-html/ 
00007 *
00008 *> \htmlonly
00009 *> Download SGESVD + dependencies 
00010 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgesvd.f"> 
00011 *> [TGZ]</a> 
00012 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgesvd.f"> 
00013 *> [ZIP]</a> 
00014 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgesvd.f"> 
00015 *> [TXT]</a>
00016 *> \endhtmlonly 
00017 *
00018 *  Definition:
00019 *  ===========
00020 *
00021 *       SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
00022 *                          WORK, LWORK, INFO )
00023 * 
00024 *       .. Scalar Arguments ..
00025 *       CHARACTER          JOBU, JOBVT
00026 *       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
00027 *       ..
00028 *       .. Array Arguments ..
00029 *       REAL               A( LDA, * ), S( * ), U( LDU, * ),
00030 *      $                   VT( LDVT, * ), WORK( * )
00031 *       ..
00032 *  
00033 *
00034 *> \par Purpose:
00035 *  =============
00036 *>
00037 *> \verbatim
00038 *>
00039 *> SGESVD computes the singular value decomposition (SVD) of a real
00040 *> M-by-N matrix A, optionally computing the left and/or right singular
00041 *> vectors. The SVD is written
00042 *>
00043 *>      A = U * SIGMA * transpose(V)
00044 *>
00045 *> where SIGMA is an M-by-N matrix which is zero except for its
00046 *> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
00047 *> V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
00048 *> are the singular values of A; they are real and non-negative, and
00049 *> are returned in descending order.  The first min(m,n) columns of
00050 *> U and V are the left and right singular vectors of A.
00051 *>
00052 *> Note that the routine returns V**T, not V.
00053 *> \endverbatim
00054 *
00055 *  Arguments:
00056 *  ==========
00057 *
00058 *> \param[in] JOBU
00059 *> \verbatim
00060 *>          JOBU is CHARACTER*1
00061 *>          Specifies options for computing all or part of the matrix U:
00062 *>          = 'A':  all M columns of U are returned in array U:
00063 *>          = 'S':  the first min(m,n) columns of U (the left singular
00064 *>                  vectors) are returned in the array U;
00065 *>          = 'O':  the first min(m,n) columns of U (the left singular
00066 *>                  vectors) are overwritten on the array A;
00067 *>          = 'N':  no columns of U (no left singular vectors) are
00068 *>                  computed.
00069 *> \endverbatim
00070 *>
00071 *> \param[in] JOBVT
00072 *> \verbatim
00073 *>          JOBVT is CHARACTER*1
00074 *>          Specifies options for computing all or part of the matrix
00075 *>          V**T:
00076 *>          = 'A':  all N rows of V**T are returned in the array VT;
00077 *>          = 'S':  the first min(m,n) rows of V**T (the right singular
00078 *>                  vectors) are returned in the array VT;
00079 *>          = 'O':  the first min(m,n) rows of V**T (the right singular
00080 *>                  vectors) are overwritten on the array A;
00081 *>          = 'N':  no rows of V**T (no right singular vectors) are
00082 *>                  computed.
00083 *>
00084 *>          JOBVT and JOBU cannot both be 'O'.
00085 *> \endverbatim
00086 *>
00087 *> \param[in] M
00088 *> \verbatim
00089 *>          M is INTEGER
00090 *>          The number of rows of the input matrix A.  M >= 0.
00091 *> \endverbatim
00092 *>
00093 *> \param[in] N
00094 *> \verbatim
00095 *>          N is INTEGER
00096 *>          The number of columns of the input matrix A.  N >= 0.
00097 *> \endverbatim
00098 *>
00099 *> \param[in,out] A
00100 *> \verbatim
00101 *>          A is REAL array, dimension (LDA,N)
00102 *>          On entry, the M-by-N matrix A.
00103 *>          On exit,
00104 *>          if JOBU = 'O',  A is overwritten with the first min(m,n)
00105 *>                          columns of U (the left singular vectors,
00106 *>                          stored columnwise);
00107 *>          if JOBVT = 'O', A is overwritten with the first min(m,n)
00108 *>                          rows of V**T (the right singular vectors,
00109 *>                          stored rowwise);
00110 *>          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
00111 *>                          are destroyed.
00112 *> \endverbatim
00113 *>
00114 *> \param[in] LDA
00115 *> \verbatim
00116 *>          LDA is INTEGER
00117 *>          The leading dimension of the array A.  LDA >= max(1,M).
00118 *> \endverbatim
00119 *>
00120 *> \param[out] S
00121 *> \verbatim
00122 *>          S is REAL array, dimension (min(M,N))
00123 *>          The singular values of A, sorted so that S(i) >= S(i+1).
00124 *> \endverbatim
00125 *>
00126 *> \param[out] U
00127 *> \verbatim
00128 *>          U is REAL array, dimension (LDU,UCOL)
00129 *>          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
00130 *>          If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
00131 *>          if JOBU = 'S', U contains the first min(m,n) columns of U
00132 *>          (the left singular vectors, stored columnwise);
00133 *>          if JOBU = 'N' or 'O', U is not referenced.
00134 *> \endverbatim
00135 *>
00136 *> \param[in] LDU
00137 *> \verbatim
00138 *>          LDU is INTEGER
00139 *>          The leading dimension of the array U.  LDU >= 1; if
00140 *>          JOBU = 'S' or 'A', LDU >= M.
00141 *> \endverbatim
00142 *>
00143 *> \param[out] VT
00144 *> \verbatim
00145 *>          VT is REAL array, dimension (LDVT,N)
00146 *>          If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
00147 *>          V**T;
00148 *>          if JOBVT = 'S', VT contains the first min(m,n) rows of
00149 *>          V**T (the right singular vectors, stored rowwise);
00150 *>          if JOBVT = 'N' or 'O', VT is not referenced.
00151 *> \endverbatim
00152 *>
00153 *> \param[in] LDVT
00154 *> \verbatim
00155 *>          LDVT is INTEGER
00156 *>          The leading dimension of the array VT.  LDVT >= 1; if
00157 *>          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
00158 *> \endverbatim
00159 *>
00160 *> \param[out] WORK
00161 *> \verbatim
00162 *>          WORK is REAL array, dimension (MAX(1,LWORK))
00163 *>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
00164 *>          if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
00165 *>          superdiagonal elements of an upper bidiagonal matrix B
00166 *>          whose diagonal is in S (not necessarily sorted). B
00167 *>          satisfies A = U * B * VT, so it has the same singular values
00168 *>          as A, and singular vectors related by U and VT.
00169 *> \endverbatim
00170 *>
00171 *> \param[in] LWORK
00172 *> \verbatim
00173 *>          LWORK is INTEGER
00174 *>          The dimension of the array WORK.
00175 *>          LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
00176 *>             - PATH 1  (M much larger than N, JOBU='N') 
00177 *>             - PATH 1t (N much larger than M, JOBVT='N')
00178 *>          LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths
00179 *>          For good performance, LWORK should generally be larger.
00180 *>
00181 *>          If LWORK = -1, then a workspace query is assumed; the routine
00182 *>          only calculates the optimal size of the WORK array, returns
00183 *>          this value as the first entry of the WORK array, and no error
00184 *>          message related to LWORK is issued by XERBLA.
00185 *> \endverbatim
00186 *>
00187 *> \param[out] INFO
00188 *> \verbatim
00189 *>          INFO is INTEGER
00190 *>          = 0:  successful exit.
00191 *>          < 0:  if INFO = -i, the i-th argument had an illegal value.
00192 *>          > 0:  if SBDSQR did not converge, INFO specifies how many
00193 *>                superdiagonals of an intermediate bidiagonal form B
00194 *>                did not converge to zero. See the description of WORK
00195 *>                above for details.
00196 *> \endverbatim
00197 *
00198 *  Authors:
00199 *  ========
00200 *
00201 *> \author Univ. of Tennessee 
00202 *> \author Univ. of California Berkeley 
00203 *> \author Univ. of Colorado Denver 
00204 *> \author NAG Ltd. 
00205 *
00206 *> \date April 2012
00207 *
00208 *> \ingroup realGEsing
00209 *
00210 *  =====================================================================
00211       SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
00212      $                   WORK, LWORK, INFO )
00213 *
00214 *  -- LAPACK driver routine (version 3.4.1) --
00215 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
00216 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
00217 *     April 2012
00218 *
00219 *     .. Scalar Arguments ..
00220       CHARACTER          JOBU, JOBVT
00221       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
00222 *     ..
00223 *     .. Array Arguments ..
00224       REAL               A( LDA, * ), S( * ), U( LDU, * ),
00225      $                   VT( LDVT, * ), WORK( * )
00226 *     ..
00227 *
00228 *  =====================================================================
00229 *
00230 *     .. Parameters ..
00231       REAL               ZERO, ONE
00232       PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
00233 *     ..
00234 *     .. Local Scalars ..
00235       LOGICAL            LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
00236      $                   WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
00237       INTEGER            BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
00238      $                   ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
00239      $                   MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
00240      $                   NRVT, WRKBL
00241       INTEGER            LWORK_SGEQRF, LWORK_SORGQR_N, LWORK_SORGQR_M,
00242      $                   LWORK_SGEBRD, LWORK_SORGBR_P, LWORK_SORGBR_Q,
00243      $                   LWORK_SGELQF, LWORK_SORGLQ_N, LWORK_SORGLQ_M
00244       REAL               ANRM, BIGNUM, EPS, SMLNUM
00245 *     ..
00246 *     .. Local Arrays ..
00247       REAL               DUM( 1 )
00248 *     ..
00249 *     .. External Subroutines ..
00250       EXTERNAL           SBDSQR, SGEBRD, SGELQF, SGEMM, SGEQRF, SLACPY,
00251      $                   SLASCL, SLASET, SORGBR, SORGLQ, SORGQR, SORMBR,
00252      $                   XERBLA
00253 *     ..
00254 *     .. External Functions ..
00255       LOGICAL            LSAME
00256       INTEGER            ILAENV
00257       REAL               SLAMCH, SLANGE
00258       EXTERNAL           LSAME, ILAENV, SLAMCH, SLANGE
00259 *     ..
00260 *     .. Intrinsic Functions ..
00261       INTRINSIC          MAX, MIN, SQRT
00262 *     ..
00263 *     .. Executable Statements ..
00264 *
00265 *     Test the input arguments
00266 *
00267       INFO = 0
00268       MINMN = MIN( M, N )
00269       WNTUA = LSAME( JOBU, 'A' )
00270       WNTUS = LSAME( JOBU, 'S' )
00271       WNTUAS = WNTUA .OR. WNTUS
00272       WNTUO = LSAME( JOBU, 'O' )
00273       WNTUN = LSAME( JOBU, 'N' )
00274       WNTVA = LSAME( JOBVT, 'A' )
00275       WNTVS = LSAME( JOBVT, 'S' )
00276       WNTVAS = WNTVA .OR. WNTVS
00277       WNTVO = LSAME( JOBVT, 'O' )
00278       WNTVN = LSAME( JOBVT, 'N' )
00279       LQUERY = ( LWORK.EQ.-1 )
00280 *
00281       IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
00282          INFO = -1
00283       ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
00284      $         ( WNTVO .AND. WNTUO ) ) THEN
00285          INFO = -2
00286       ELSE IF( M.LT.0 ) THEN
00287          INFO = -3
00288       ELSE IF( N.LT.0 ) THEN
00289          INFO = -4
00290       ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
00291          INFO = -6
00292       ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
00293          INFO = -9
00294       ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
00295      $         ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
00296          INFO = -11
00297       END IF
00298 *
00299 *     Compute workspace
00300 *      (Note: Comments in the code beginning "Workspace:" describe the
00301 *       minimal amount of workspace needed at that point in the code,
00302 *       as well as the preferred amount for good performance.
00303 *       NB refers to the optimal block size for the immediately
00304 *       following subroutine, as returned by ILAENV.)
00305 *
00306       IF( INFO.EQ.0 ) THEN
00307          MINWRK = 1
00308          MAXWRK = 1
00309          IF( M.GE.N .AND. MINMN.GT.0 ) THEN
00310 *
00311 *           Compute space needed for SBDSQR
00312 *
00313             MNTHR = ILAENV( 6, 'SGESVD', JOBU // JOBVT, M, N, 0, 0 )
00314             BDSPAC = 5*N
00315 *           Compute space needed for SGEQRF
00316             CALL SGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
00317             LWORK_SGEQRF=DUM(1)
00318 *           Compute space needed for SORGQR
00319             CALL SORGQR( M, N, N, A, LDA, DUM(1), DUM(1), -1, IERR )
00320             LWORK_SORGQR_N=DUM(1)
00321             CALL SORGQR( M, M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
00322             LWORK_SORGQR_M=DUM(1)
00323 *           Compute space needed for SGEBRD
00324             CALL SGEBRD( N, N, A, LDA, S, DUM(1), DUM(1),
00325      $                   DUM(1), DUM(1), -1, IERR )
00326             LWORK_SGEBRD=DUM(1)
00327 *           Compute space needed for SORGBR P
00328             CALL SORGBR( 'P', N, N, N, A, LDA, DUM(1),
00329      $                   DUM(1), -1, IERR )
00330             LWORK_SORGBR_P=DUM(1)
00331 *           Compute space needed for SORGBR Q
00332             CALL SORGBR( 'Q', N, N, N, A, LDA, DUM(1),
00333      $                   DUM(1), -1, IERR )
00334             LWORK_SORGBR_Q=DUM(1)
00335 *
00336             IF( M.GE.MNTHR ) THEN
00337                IF( WNTUN ) THEN
00338 *
00339 *                 Path 1 (M much larger than N, JOBU='N')
00340 *
00341                   MAXWRK = N + LWORK_SGEQRF
00342                   MAXWRK = MAX( MAXWRK, 3*N+LWORK_SGEBRD )
00343                   IF( WNTVO .OR. WNTVAS )
00344      $               MAXWRK = MAX( MAXWRK, 3*N+LWORK_SORGBR_P )
00345                   MAXWRK = MAX( MAXWRK, BDSPAC )
00346                   MINWRK = MAX( 4*N, BDSPAC )
00347                ELSE IF( WNTUO .AND. WNTVN ) THEN
00348 *
00349 *                 Path 2 (M much larger than N, JOBU='O', JOBVT='N')
00350 *
00351                   WRKBL = N + LWORK_SGEQRF
00352                   WRKBL = MAX( WRKBL, N+LWORK_SORGQR_N )
00353                   WRKBL = MAX( WRKBL, 3*N+LWORK_SGEBRD )
00354                   WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_Q )
00355                   WRKBL = MAX( WRKBL, BDSPAC )
00356                   MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
00357                   MINWRK = MAX( 3*N+M, BDSPAC )
00358                ELSE IF( WNTUO .AND. WNTVAS ) THEN
00359 *
00360 *                 Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
00361 *                 'A')
00362 *
00363                   WRKBL = N + LWORK_SGEQRF
00364                   WRKBL = MAX( WRKBL, N+LWORK_SORGQR_N )
00365                   WRKBL = MAX( WRKBL, 3*N+LWORK_SGEBRD )
00366                   WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_Q )
00367                   WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_P )
00368                   WRKBL = MAX( WRKBL, BDSPAC )
00369                   MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
00370                   MINWRK = MAX( 3*N+M, BDSPAC )
00371                ELSE IF( WNTUS .AND. WNTVN ) THEN
00372 *
00373 *                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
00374 *
00375                   WRKBL = N + LWORK_SGEQRF
00376                   WRKBL = MAX( WRKBL, N+LWORK_SORGQR_N )
00377                   WRKBL = MAX( WRKBL, 3*N+LWORK_SGEBRD )
00378                   WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_Q )
00379                   WRKBL = MAX( WRKBL, BDSPAC )
00380                   MAXWRK = N*N + WRKBL
00381                   MINWRK = MAX( 3*N+M, BDSPAC )
00382                ELSE IF( WNTUS .AND. WNTVO ) THEN
00383 *
00384 *                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
00385 *
00386                   WRKBL = N + LWORK_SGEQRF
00387                   WRKBL = MAX( WRKBL, N+LWORK_SORGQR_N )
00388                   WRKBL = MAX( WRKBL, 3*N+LWORK_SGEBRD )
00389                   WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_Q )
00390                   WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_P )
00391                   WRKBL = MAX( WRKBL, BDSPAC )
00392                   MAXWRK = 2*N*N + WRKBL
00393                   MINWRK = MAX( 3*N+M, BDSPAC )
00394                ELSE IF( WNTUS .AND. WNTVAS ) THEN
00395 *
00396 *                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
00397 *                 'A')
00398 *
00399                   WRKBL = N + LWORK_SGEQRF
00400                   WRKBL = MAX( WRKBL, N+LWORK_SORGQR_N )
00401                   WRKBL = MAX( WRKBL, 3*N+LWORK_SGEBRD )
00402                   WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_Q )
00403                   WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_P )
00404                   WRKBL = MAX( WRKBL, BDSPAC )
00405                   MAXWRK = N*N + WRKBL
00406                   MINWRK = MAX( 3*N+M, BDSPAC )
00407                ELSE IF( WNTUA .AND. WNTVN ) THEN
00408 *
00409 *                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
00410 *
00411                   WRKBL = N + LWORK_SGEQRF
00412                   WRKBL = MAX( WRKBL, N+LWORK_SORGQR_M )
00413                   WRKBL = MAX( WRKBL, 3*N+LWORK_SGEBRD )
00414                   WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_Q )
00415                   WRKBL = MAX( WRKBL, BDSPAC )
00416                   MAXWRK = N*N + WRKBL
00417                   MINWRK = MAX( 3*N+M, BDSPAC )
00418                ELSE IF( WNTUA .AND. WNTVO ) THEN
00419 *
00420 *                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
00421 *
00422                   WRKBL = N + LWORK_SGEQRF
00423                   WRKBL = MAX( WRKBL, N+LWORK_SORGQR_M )
00424                   WRKBL = MAX( WRKBL, 3*N+LWORK_SGEBRD )
00425                   WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_Q )
00426                   WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_P )
00427                   WRKBL = MAX( WRKBL, BDSPAC )
00428                   MAXWRK = 2*N*N + WRKBL
00429                   MINWRK = MAX( 3*N+M, BDSPAC )
00430                ELSE IF( WNTUA .AND. WNTVAS ) THEN
00431 *
00432 *                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
00433 *                 'A')
00434 *
00435                   WRKBL = N + LWORK_SGEQRF
00436                   WRKBL = MAX( WRKBL, N+LWORK_SORGQR_M )
00437                   WRKBL = MAX( WRKBL, 3*N+LWORK_SGEBRD )
00438                   WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_Q )
00439                   WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_P )
00440                   WRKBL = MAX( WRKBL, BDSPAC )
00441                   MAXWRK = N*N + WRKBL
00442                   MINWRK = MAX( 3*N+M, BDSPAC )
00443                END IF
00444             ELSE
00445 *
00446 *              Path 10 (M at least N, but not much larger)
00447 *
00448                CALL SGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
00449      $                   DUM(1), DUM(1), -1, IERR )
00450                LWORK_SGEBRD=DUM(1)
00451                MAXWRK = 3*N + LWORK_SGEBRD
00452                IF( WNTUS .OR. WNTUO ) THEN
00453                   CALL SORGBR( 'Q', M, N, N, A, LDA, DUM(1),
00454      $                   DUM(1), -1, IERR )
00455                   LWORK_SORGBR_Q=DUM(1)
00456                   MAXWRK = MAX( MAXWRK, 3*N+LWORK_SORGBR_Q )
00457                END IF
00458                IF( WNTUA ) THEN
00459                   CALL SORGBR( 'Q', M, M, N, A, LDA, DUM(1),
00460      $                   DUM(1), -1, IERR )
00461                   LWORK_SORGBR_Q=DUM(1)
00462                   MAXWRK = MAX( MAXWRK, 3*N+LWORK_SORGBR_Q )
00463                END IF
00464                IF( .NOT.WNTVN ) THEN
00465                  MAXWRK = MAX( MAXWRK, 3*N+LWORK_SORGBR_P )
00466                END IF
00467                MAXWRK = MAX( MAXWRK, BDSPAC )
00468                MINWRK = MAX( 3*N+M, BDSPAC )
00469             END IF
00470          ELSE IF( MINMN.GT.0 ) THEN
00471 *
00472 *           Compute space needed for SBDSQR
00473 *
00474             MNTHR = ILAENV( 6, 'SGESVD', JOBU // JOBVT, M, N, 0, 0 )
00475             BDSPAC = 5*M
00476 *           Compute space needed for SGELQF
00477             CALL SGELQF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
00478             LWORK_SGELQF=DUM(1)
00479 *           Compute space needed for SORGLQ
00480             CALL SORGLQ( N, N, M, DUM(1), N, DUM(1), DUM(1), -1, IERR )
00481             LWORK_SORGLQ_N=DUM(1)
00482             CALL SORGLQ( M, N, M, A, LDA, DUM(1), DUM(1), -1, IERR )
00483             LWORK_SORGLQ_M=DUM(1)
00484 *           Compute space needed for SGEBRD
00485             CALL SGEBRD( M, M, A, LDA, S, DUM(1), DUM(1),
00486      $                   DUM(1), DUM(1), -1, IERR )
00487             LWORK_SGEBRD=DUM(1)
00488 *            Compute space needed for SORGBR P
00489             CALL SORGBR( 'P', M, M, M, A, N, DUM(1),
00490      $                   DUM(1), -1, IERR )
00491             LWORK_SORGBR_P=DUM(1)
00492 *           Compute space needed for SORGBR Q
00493             CALL SORGBR( 'Q', M, M, M, A, N, DUM(1),
00494      $                   DUM(1), -1, IERR )
00495             LWORK_SORGBR_Q=DUM(1)
00496             IF( N.GE.MNTHR ) THEN
00497                IF( WNTVN ) THEN
00498 *
00499 *                 Path 1t(N much larger than M, JOBVT='N')
00500 *
00501                   MAXWRK = M + LWORK_SGELQF
00502                   MAXWRK = MAX( MAXWRK, 3*M+LWORK_SGEBRD )
00503                   IF( WNTUO .OR. WNTUAS )
00504      $               MAXWRK = MAX( MAXWRK, 3*M+LWORK_SORGBR_Q )
00505                   MAXWRK = MAX( MAXWRK, BDSPAC )
00506                   MINWRK = MAX( 4*M, BDSPAC )
00507                ELSE IF( WNTVO .AND. WNTUN ) THEN
00508 *
00509 *                 Path 2t(N much larger than M, JOBU='N', JOBVT='O')
00510 *
00511                   WRKBL = M + LWORK_SGELQF
00512                   WRKBL = MAX( WRKBL, M+LWORK_SORGLQ_M )
00513                   WRKBL = MAX( WRKBL, 3*M+LWORK_SGEBRD )
00514                   WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_P )
00515                   WRKBL = MAX( WRKBL, BDSPAC )
00516                   MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
00517                   MINWRK = MAX( 3*M+N, BDSPAC )
00518                ELSE IF( WNTVO .AND. WNTUAS ) THEN
00519 *
00520 *                 Path 3t(N much larger than M, JOBU='S' or 'A',
00521 *                 JOBVT='O')
00522 *
00523                   WRKBL = M + LWORK_SGELQF
00524                   WRKBL = MAX( WRKBL, M+LWORK_SORGLQ_M )
00525                   WRKBL = MAX( WRKBL, 3*M+LWORK_SGEBRD )
00526                   WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_P )
00527                   WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_Q )
00528                   WRKBL = MAX( WRKBL, BDSPAC )
00529                   MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
00530                   MINWRK = MAX( 3*M+N, BDSPAC )
00531                ELSE IF( WNTVS .AND. WNTUN ) THEN
00532 *
00533 *                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
00534 *
00535                   WRKBL = M + LWORK_SGELQF
00536                   WRKBL = MAX( WRKBL, M+LWORK_SORGLQ_M )
00537                   WRKBL = MAX( WRKBL, 3*M+LWORK_SGEBRD )
00538                   WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_P )
00539                   WRKBL = MAX( WRKBL, BDSPAC )
00540                   MAXWRK = M*M + WRKBL
00541                   MINWRK = MAX( 3*M+N, BDSPAC )
00542                ELSE IF( WNTVS .AND. WNTUO ) THEN
00543 *
00544 *                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
00545 *
00546                   WRKBL = M + LWORK_SGELQF
00547                   WRKBL = MAX( WRKBL, M+LWORK_SORGLQ_M )
00548                   WRKBL = MAX( WRKBL, 3*M+LWORK_SGEBRD )
00549                   WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_P )
00550                   WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_Q )
00551                   WRKBL = MAX( WRKBL, BDSPAC )
00552                   MAXWRK = 2*M*M + WRKBL
00553                   MINWRK = MAX( 3*M+N, BDSPAC )
00554                   MAXWRK = MAX( MAXWRK, MINWRK )
00555                ELSE IF( WNTVS .AND. WNTUAS ) THEN
00556 *
00557 *                 Path 6t(N much larger than M, JOBU='S' or 'A',
00558 *                 JOBVT='S')
00559 *
00560                   WRKBL = M + LWORK_SGELQF
00561                   WRKBL = MAX( WRKBL, M+LWORK_SORGLQ_M )
00562                   WRKBL = MAX( WRKBL, 3*M+LWORK_SGEBRD )
00563                   WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_P )
00564                   WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_Q )
00565                   WRKBL = MAX( WRKBL, BDSPAC )
00566                   MAXWRK = M*M + WRKBL
00567                   MINWRK = MAX( 3*M+N, BDSPAC )
00568                ELSE IF( WNTVA .AND. WNTUN ) THEN
00569 *
00570 *                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
00571 *
00572                   WRKBL = M + LWORK_SGELQF
00573                   WRKBL = MAX( WRKBL, M+LWORK_SORGLQ_N )
00574                   WRKBL = MAX( WRKBL, 3*M+LWORK_SGEBRD )
00575                   WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_P )
00576                   WRKBL = MAX( WRKBL, BDSPAC )
00577                   MAXWRK = M*M + WRKBL
00578                   MINWRK = MAX( 3*M+N, BDSPAC )
00579                ELSE IF( WNTVA .AND. WNTUO ) THEN
00580 *
00581 *                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
00582 *
00583                   WRKBL = M + LWORK_SGELQF
00584                   WRKBL = MAX( WRKBL, M+LWORK_SORGLQ_N )
00585                   WRKBL = MAX( WRKBL, 3*M+LWORK_SGEBRD )
00586                   WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_P )
00587                   WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_Q )
00588                   WRKBL = MAX( WRKBL, BDSPAC )
00589                   MAXWRK = 2*M*M + WRKBL
00590                   MINWRK = MAX( 3*M+N, BDSPAC )
00591                ELSE IF( WNTVA .AND. WNTUAS ) THEN
00592 *
00593 *                 Path 9t(N much larger than M, JOBU='S' or 'A',
00594 *                 JOBVT='A')
00595 *
00596                   WRKBL = M + LWORK_SGELQF
00597                   WRKBL = MAX( WRKBL, M+LWORK_SORGLQ_N )
00598                   WRKBL = MAX( WRKBL, 3*M+LWORK_SGEBRD )
00599                   WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_P )
00600                   WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_Q )
00601                   WRKBL = MAX( WRKBL, BDSPAC )
00602                   MAXWRK = M*M + WRKBL
00603                   MINWRK = MAX( 3*M+N, BDSPAC )
00604                END IF
00605             ELSE
00606 *
00607 *              Path 10t(N greater than M, but not much larger)
00608 *
00609                CALL SGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
00610      $                   DUM(1), DUM(1), -1, IERR )
00611                LWORK_SGEBRD=DUM(1)
00612                MAXWRK = 3*M + LWORK_SGEBRD
00613                IF( WNTVS .OR. WNTVO ) THEN
00614 *                Compute space needed for SORGBR P
00615                  CALL SORGBR( 'P', M, N, M, A, N, DUM(1),
00616      $                   DUM(1), -1, IERR )
00617                  LWORK_SORGBR_P=DUM(1)
00618                  MAXWRK = MAX( MAXWRK, 3*M+LWORK_SORGBR_P )
00619                END IF
00620                IF( WNTVA ) THEN
00621                  CALL SORGBR( 'P', N, N, M, A, N, DUM(1),
00622      $                   DUM(1), -1, IERR )
00623                  LWORK_SORGBR_P=DUM(1)
00624                  MAXWRK = MAX( MAXWRK, 3*M+LWORK_SORGBR_P )
00625                END IF
00626                IF( .NOT.WNTUN ) THEN
00627                   MAXWRK = MAX( MAXWRK, 3*M+LWORK_SORGBR_Q )
00628                END IF
00629                MAXWRK = MAX( MAXWRK, BDSPAC )
00630                MINWRK = MAX( 3*M+N, BDSPAC )
00631             END IF
00632          END IF
00633          MAXWRK = MAX( MAXWRK, MINWRK )
00634          WORK( 1 ) = MAXWRK
00635 *
00636          IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
00637             INFO = -13
00638          END IF
00639       END IF
00640 *
00641       IF( INFO.NE.0 ) THEN
00642          CALL XERBLA( 'SGESVD', -INFO )
00643          RETURN
00644       ELSE IF( LQUERY ) THEN
00645          RETURN
00646       END IF
00647 *
00648 *     Quick return if possible
00649 *
00650       IF( M.EQ.0 .OR. N.EQ.0 ) THEN
00651          RETURN
00652       END IF
00653 *
00654 *     Get machine constants
00655 *
00656       EPS = SLAMCH( 'P' )
00657       SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS
00658       BIGNUM = ONE / SMLNUM
00659 *
00660 *     Scale A if max element outside range [SMLNUM,BIGNUM]
00661 *
00662       ANRM = SLANGE( 'M', M, N, A, LDA, DUM )
00663       ISCL = 0
00664       IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
00665          ISCL = 1
00666          CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
00667       ELSE IF( ANRM.GT.BIGNUM ) THEN
00668          ISCL = 1
00669          CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
00670       END IF
00671 *
00672       IF( M.GE.N ) THEN
00673 *
00674 *        A has at least as many rows as columns. If A has sufficiently
00675 *        more rows than columns, first reduce using the QR
00676 *        decomposition (if sufficient workspace available)
00677 *
00678          IF( M.GE.MNTHR ) THEN
00679 *
00680             IF( WNTUN ) THEN
00681 *
00682 *              Path 1 (M much larger than N, JOBU='N')
00683 *              No left singular vectors to be computed
00684 *
00685                ITAU = 1
00686                IWORK = ITAU + N
00687 *
00688 *              Compute A=Q*R
00689 *              (Workspace: need 2*N, prefer N+N*NB)
00690 *
00691                CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
00692      $                      LWORK-IWORK+1, IERR )
00693 *
00694 *              Zero out below R
00695 *
00696                CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
00697                IE = 1
00698                ITAUQ = IE + N
00699                ITAUP = ITAUQ + N
00700                IWORK = ITAUP + N
00701 *
00702 *              Bidiagonalize R in A
00703 *              (Workspace: need 4*N, prefer 3*N+2*N*NB)
00704 *
00705                CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
00706      $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
00707      $                      IERR )
00708                NCVT = 0
00709                IF( WNTVO .OR. WNTVAS ) THEN
00710 *
00711 *                 If right singular vectors desired, generate P'.
00712 *                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
00713 *
00714                   CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
00715      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00716                   NCVT = N
00717                END IF
00718                IWORK = IE + N
00719 *
00720 *              Perform bidiagonal QR iteration, computing right
00721 *              singular vectors of A in A if desired
00722 *              (Workspace: need BDSPAC)
00723 *
00724                CALL SBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA,
00725      $                      DUM, 1, DUM, 1, WORK( IWORK ), INFO )
00726 *
00727 *              If right singular vectors desired in VT, copy them there
00728 *
00729                IF( WNTVAS )
00730      $            CALL SLACPY( 'F', N, N, A, LDA, VT, LDVT )
00731 *
00732             ELSE IF( WNTUO .AND. WNTVN ) THEN
00733 *
00734 *              Path 2 (M much larger than N, JOBU='O', JOBVT='N')
00735 *              N left singular vectors to be overwritten on A and
00736 *              no right singular vectors to be computed
00737 *
00738                IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
00739 *
00740 *                 Sufficient workspace for a fast algorithm
00741 *
00742                   IR = 1
00743                   IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
00744 *
00745 *                    WORK(IU) is LDA by N, WORK(IR) is LDA by N
00746 *
00747                      LDWRKU = LDA
00748                      LDWRKR = LDA
00749                   ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
00750 *
00751 *                    WORK(IU) is LDA by N, WORK(IR) is N by N
00752 *
00753                      LDWRKU = LDA
00754                      LDWRKR = N
00755                   ELSE
00756 *
00757 *                    WORK(IU) is LDWRKU by N, WORK(IR) is N by N
00758 *
00759                      LDWRKU = ( LWORK-N*N-N ) / N
00760                      LDWRKR = N
00761                   END IF
00762                   ITAU = IR + LDWRKR*N
00763                   IWORK = ITAU + N
00764 *
00765 *                 Compute A=Q*R
00766 *                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
00767 *
00768                   CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
00769      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00770 *
00771 *                 Copy R to WORK(IR) and zero out below it
00772 *
00773                   CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
00774                   CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
00775      $                         LDWRKR )
00776 *
00777 *                 Generate Q in A
00778 *                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
00779 *
00780                   CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
00781      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00782                   IE = ITAU
00783                   ITAUQ = IE + N
00784                   ITAUP = ITAUQ + N
00785                   IWORK = ITAUP + N
00786 *
00787 *                 Bidiagonalize R in WORK(IR)
00788 *                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
00789 *
00790                   CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
00791      $                         WORK( ITAUQ ), WORK( ITAUP ),
00792      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00793 *
00794 *                 Generate left vectors bidiagonalizing R
00795 *                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
00796 *
00797                   CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
00798      $                         WORK( ITAUQ ), WORK( IWORK ),
00799      $                         LWORK-IWORK+1, IERR )
00800                   IWORK = IE + N
00801 *
00802 *                 Perform bidiagonal QR iteration, computing left
00803 *                 singular vectors of R in WORK(IR)
00804 *                 (Workspace: need N*N+BDSPAC)
00805 *
00806                   CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1,
00807      $                         WORK( IR ), LDWRKR, DUM, 1,
00808      $                         WORK( IWORK ), INFO )
00809                   IU = IE + N
00810 *
00811 *                 Multiply Q in A by left singular vectors of R in
00812 *                 WORK(IR), storing result in WORK(IU) and copying to A
00813 *                 (Workspace: need N*N+2*N, prefer N*N+M*N+N)
00814 *
00815                   DO 10 I = 1, M, LDWRKU
00816                      CHUNK = MIN( M-I+1, LDWRKU )
00817                      CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
00818      $                           LDA, WORK( IR ), LDWRKR, ZERO,
00819      $                           WORK( IU ), LDWRKU )
00820                      CALL SLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
00821      $                            A( I, 1 ), LDA )
00822    10             CONTINUE
00823 *
00824                ELSE
00825 *
00826 *                 Insufficient workspace for a fast algorithm
00827 *
00828                   IE = 1
00829                   ITAUQ = IE + N
00830                   ITAUP = ITAUQ + N
00831                   IWORK = ITAUP + N
00832 *
00833 *                 Bidiagonalize A
00834 *                 (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
00835 *
00836                   CALL SGEBRD( M, N, A, LDA, S, WORK( IE ),
00837      $                         WORK( ITAUQ ), WORK( ITAUP ),
00838      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00839 *
00840 *                 Generate left vectors bidiagonalizing A
00841 *                 (Workspace: need 4*N, prefer 3*N+N*NB)
00842 *
00843                   CALL SORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
00844      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00845                   IWORK = IE + N
00846 *
00847 *                 Perform bidiagonal QR iteration, computing left
00848 *                 singular vectors of A in A
00849 *                 (Workspace: need BDSPAC)
00850 *
00851                   CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1,
00852      $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
00853 *
00854                END IF
00855 *
00856             ELSE IF( WNTUO .AND. WNTVAS ) THEN
00857 *
00858 *              Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
00859 *              N left singular vectors to be overwritten on A and
00860 *              N right singular vectors to be computed in VT
00861 *
00862                IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
00863 *
00864 *                 Sufficient workspace for a fast algorithm
00865 *
00866                   IR = 1
00867                   IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
00868 *
00869 *                    WORK(IU) is LDA by N and WORK(IR) is LDA by N
00870 *
00871                      LDWRKU = LDA
00872                      LDWRKR = LDA
00873                   ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
00874 *
00875 *                    WORK(IU) is LDA by N and WORK(IR) is N by N
00876 *
00877                      LDWRKU = LDA
00878                      LDWRKR = N
00879                   ELSE
00880 *
00881 *                    WORK(IU) is LDWRKU by N and WORK(IR) is N by N
00882 *
00883                      LDWRKU = ( LWORK-N*N-N ) / N
00884                      LDWRKR = N
00885                   END IF
00886                   ITAU = IR + LDWRKR*N
00887                   IWORK = ITAU + N
00888 *
00889 *                 Compute A=Q*R
00890 *                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
00891 *
00892                   CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
00893      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00894 *
00895 *                 Copy R to VT, zeroing out below it
00896 *
00897                   CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
00898                   IF( N.GT.1 )
00899      $               CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
00900      $                            VT( 2, 1 ), LDVT )
00901 *
00902 *                 Generate Q in A
00903 *                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
00904 *
00905                   CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
00906      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00907                   IE = ITAU
00908                   ITAUQ = IE + N
00909                   ITAUP = ITAUQ + N
00910                   IWORK = ITAUP + N
00911 *
00912 *                 Bidiagonalize R in VT, copying result to WORK(IR)
00913 *                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
00914 *
00915                   CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ),
00916      $                         WORK( ITAUQ ), WORK( ITAUP ),
00917      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00918                   CALL SLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
00919 *
00920 *                 Generate left vectors bidiagonalizing R in WORK(IR)
00921 *                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
00922 *
00923                   CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
00924      $                         WORK( ITAUQ ), WORK( IWORK ),
00925      $                         LWORK-IWORK+1, IERR )
00926 *
00927 *                 Generate right vectors bidiagonalizing R in VT
00928 *                 (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
00929 *
00930                   CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
00931      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00932                   IWORK = IE + N
00933 *
00934 *                 Perform bidiagonal QR iteration, computing left
00935 *                 singular vectors of R in WORK(IR) and computing right
00936 *                 singular vectors of R in VT
00937 *                 (Workspace: need N*N+BDSPAC)
00938 *
00939                   CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT,
00940      $                         WORK( IR ), LDWRKR, DUM, 1,
00941      $                         WORK( IWORK ), INFO )
00942                   IU = IE + N
00943 *
00944 *                 Multiply Q in A by left singular vectors of R in
00945 *                 WORK(IR), storing result in WORK(IU) and copying to A
00946 *                 (Workspace: need N*N+2*N, prefer N*N+M*N+N)
00947 *
00948                   DO 20 I = 1, M, LDWRKU
00949                      CHUNK = MIN( M-I+1, LDWRKU )
00950                      CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
00951      $                           LDA, WORK( IR ), LDWRKR, ZERO,
00952      $                           WORK( IU ), LDWRKU )
00953                      CALL SLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
00954      $                            A( I, 1 ), LDA )
00955    20             CONTINUE
00956 *
00957                ELSE
00958 *
00959 *                 Insufficient workspace for a fast algorithm
00960 *
00961                   ITAU = 1
00962                   IWORK = ITAU + N
00963 *
00964 *                 Compute A=Q*R
00965 *                 (Workspace: need 2*N, prefer N+N*NB)
00966 *
00967                   CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
00968      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00969 *
00970 *                 Copy R to VT, zeroing out below it
00971 *
00972                   CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
00973                   IF( N.GT.1 )
00974      $               CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
00975      $                            VT( 2, 1 ), LDVT )
00976 *
00977 *                 Generate Q in A
00978 *                 (Workspace: need 2*N, prefer N+N*NB)
00979 *
00980                   CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
00981      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00982                   IE = ITAU
00983                   ITAUQ = IE + N
00984                   ITAUP = ITAUQ + N
00985                   IWORK = ITAUP + N
00986 *
00987 *                 Bidiagonalize R in VT
00988 *                 (Workspace: need 4*N, prefer 3*N+2*N*NB)
00989 *
00990                   CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ),
00991      $                         WORK( ITAUQ ), WORK( ITAUP ),
00992      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00993 *
00994 *                 Multiply Q in A by left vectors bidiagonalizing R
00995 *                 (Workspace: need 3*N+M, prefer 3*N+M*NB)
00996 *
00997                   CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
00998      $                         WORK( ITAUQ ), A, LDA, WORK( IWORK ),
00999      $                         LWORK-IWORK+1, IERR )
01000 *
01001 *                 Generate right vectors bidiagonalizing R in VT
01002 *                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
01003 *
01004                   CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
01005      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
01006                   IWORK = IE + N
01007 *
01008 *                 Perform bidiagonal QR iteration, computing left
01009 *                 singular vectors of A in A and computing right
01010 *                 singular vectors of A in VT
01011 *                 (Workspace: need BDSPAC)
01012 *
01013                   CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT,
01014      $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
01015 *
01016                END IF
01017 *
01018             ELSE IF( WNTUS ) THEN
01019 *
01020                IF( WNTVN ) THEN
01021 *
01022 *                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
01023 *                 N left singular vectors to be computed in U and
01024 *                 no right singular vectors to be computed
01025 *
01026                   IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
01027 *
01028 *                    Sufficient workspace for a fast algorithm
01029 *
01030                      IR = 1
01031                      IF( LWORK.GE.WRKBL+LDA*N ) THEN
01032 *
01033 *                       WORK(IR) is LDA by N
01034 *
01035                         LDWRKR = LDA
01036                      ELSE
01037 *
01038 *                       WORK(IR) is N by N
01039 *
01040                         LDWRKR = N
01041                      END IF
01042                      ITAU = IR + LDWRKR*N
01043                      IWORK = ITAU + N
01044 *
01045 *                    Compute A=Q*R
01046 *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
01047 *
01048                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
01049      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01050 *
01051 *                    Copy R to WORK(IR), zeroing out below it
01052 *
01053                      CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ),
01054      $                            LDWRKR )
01055                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
01056      $                            WORK( IR+1 ), LDWRKR )
01057 *
01058 *                    Generate Q in A
01059 *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
01060 *
01061                      CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
01062      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01063                      IE = ITAU
01064                      ITAUQ = IE + N
01065                      ITAUP = ITAUQ + N
01066                      IWORK = ITAUP + N
01067 *
01068 *                    Bidiagonalize R in WORK(IR)
01069 *                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
01070 *
01071                      CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S,
01072      $                            WORK( IE ), WORK( ITAUQ ),
01073      $                            WORK( ITAUP ), WORK( IWORK ),
01074      $                            LWORK-IWORK+1, IERR )
01075 *
01076 *                    Generate left vectors bidiagonalizing R in WORK(IR)
01077 *                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
01078 *
01079                      CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
01080      $                            WORK( ITAUQ ), WORK( IWORK ),
01081      $                            LWORK-IWORK+1, IERR )
01082                      IWORK = IE + N
01083 *
01084 *                    Perform bidiagonal QR iteration, computing left
01085 *                    singular vectors of R in WORK(IR)
01086 *                    (Workspace: need N*N+BDSPAC)
01087 *
01088                      CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
01089      $                            1, WORK( IR ), LDWRKR, DUM, 1,
01090      $                            WORK( IWORK ), INFO )
01091 *
01092 *                    Multiply Q in A by left singular vectors of R in
01093 *                    WORK(IR), storing result in U
01094 *                    (Workspace: need N*N)
01095 *
01096                      CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
01097      $                           WORK( IR ), LDWRKR, ZERO, U, LDU )
01098 *
01099                   ELSE
01100 *
01101 *                    Insufficient workspace for a fast algorithm
01102 *
01103                      ITAU = 1
01104                      IWORK = ITAU + N
01105 *
01106 *                    Compute A=Q*R, copying result to U
01107 *                    (Workspace: need 2*N, prefer N+N*NB)
01108 *
01109                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
01110      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01111                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
01112 *
01113 *                    Generate Q in U
01114 *                    (Workspace: need 2*N, prefer N+N*NB)
01115 *
01116                      CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ),
01117      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01118                      IE = ITAU
01119                      ITAUQ = IE + N
01120                      ITAUP = ITAUQ + N
01121                      IWORK = ITAUP + N
01122 *
01123 *                    Zero out below R in A
01124 *
01125                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
01126      $                            LDA )
01127 *
01128 *                    Bidiagonalize R in A
01129 *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
01130 *
01131                      CALL SGEBRD( N, N, A, LDA, S, WORK( IE ),
01132      $                            WORK( ITAUQ ), WORK( ITAUP ),
01133      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01134 *
01135 *                    Multiply Q in U by left vectors bidiagonalizing R
01136 *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
01137 *
01138                      CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
01139      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
01140      $                            LWORK-IWORK+1, IERR )
01141                      IWORK = IE + N
01142 *
01143 *                    Perform bidiagonal QR iteration, computing left
01144 *                    singular vectors of A in U
01145 *                    (Workspace: need BDSPAC)
01146 *
01147                      CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
01148      $                            1, U, LDU, DUM, 1, WORK( IWORK ),
01149      $                            INFO )
01150 *
01151                   END IF
01152 *
01153                ELSE IF( WNTVO ) THEN
01154 *
01155 *                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
01156 *                 N left singular vectors to be computed in U and
01157 *                 N right singular vectors to be overwritten on A
01158 *
01159                   IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN
01160 *
01161 *                    Sufficient workspace for a fast algorithm
01162 *
01163                      IU = 1
01164                      IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
01165 *
01166 *                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
01167 *
01168                         LDWRKU = LDA
01169                         IR = IU + LDWRKU*N
01170                         LDWRKR = LDA
01171                      ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
01172 *
01173 *                       WORK(IU) is LDA by N and WORK(IR) is N by N
01174 *
01175                         LDWRKU = LDA
01176                         IR = IU + LDWRKU*N
01177                         LDWRKR = N
01178                      ELSE
01179 *
01180 *                       WORK(IU) is N by N and WORK(IR) is N by N
01181 *
01182                         LDWRKU = N
01183                         IR = IU + LDWRKU*N
01184                         LDWRKR = N
01185                      END IF
01186                      ITAU = IR + LDWRKR*N
01187                      IWORK = ITAU + N
01188 *
01189 *                    Compute A=Q*R
01190 *                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
01191 *
01192                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
01193      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01194 *
01195 *                    Copy R to WORK(IU), zeroing out below it
01196 *
01197                      CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ),
01198      $                            LDWRKU )
01199                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
01200      $                            WORK( IU+1 ), LDWRKU )
01201 *
01202 *                    Generate Q in A
01203 *                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
01204 *
01205                      CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
01206      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01207                      IE = ITAU
01208                      ITAUQ = IE + N
01209                      ITAUP = ITAUQ + N
01210                      IWORK = ITAUP + N
01211 *
01212 *                    Bidiagonalize R in WORK(IU), copying result to
01213 *                    WORK(IR)
01214 *                    (Workspace: need 2*N*N+4*N,
01215 *                                prefer 2*N*N+3*N+2*N*NB)
01216 *
01217                      CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S,
01218      $                            WORK( IE ), WORK( ITAUQ ),
01219      $                            WORK( ITAUP ), WORK( IWORK ),
01220      $                            LWORK-IWORK+1, IERR )
01221                      CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU,
01222      $                            WORK( IR ), LDWRKR )
01223 *
01224 *                    Generate left bidiagonalizing vectors in WORK(IU)
01225 *                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
01226 *
01227                      CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
01228      $                            WORK( ITAUQ ), WORK( IWORK ),
01229      $                            LWORK-IWORK+1, IERR )
01230 *
01231 *                    Generate right bidiagonalizing vectors in WORK(IR)
01232 *                    (Workspace: need 2*N*N+4*N-1,
01233 *                                prefer 2*N*N+3*N+(N-1)*NB)
01234 *
01235                      CALL SORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
01236      $                            WORK( ITAUP ), WORK( IWORK ),
01237      $                            LWORK-IWORK+1, IERR )
01238                      IWORK = IE + N
01239 *
01240 *                    Perform bidiagonal QR iteration, computing left
01241 *                    singular vectors of R in WORK(IU) and computing
01242 *                    right singular vectors of R in WORK(IR)
01243 *                    (Workspace: need 2*N*N+BDSPAC)
01244 *
01245                      CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
01246      $                            WORK( IR ), LDWRKR, WORK( IU ),
01247      $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
01248 *
01249 *                    Multiply Q in A by left singular vectors of R in
01250 *                    WORK(IU), storing result in U
01251 *                    (Workspace: need N*N)
01252 *
01253                      CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
01254      $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
01255 *
01256 *                    Copy right singular vectors of R to A
01257 *                    (Workspace: need N*N)
01258 *
01259                      CALL SLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
01260      $                            LDA )
01261 *
01262                   ELSE
01263 *
01264 *                    Insufficient workspace for a fast algorithm
01265 *
01266                      ITAU = 1
01267                      IWORK = ITAU + N
01268 *
01269 *                    Compute A=Q*R, copying result to U
01270 *                    (Workspace: need 2*N, prefer N+N*NB)
01271 *
01272                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
01273      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01274                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
01275 *
01276 *                    Generate Q in U
01277 *                    (Workspace: need 2*N, prefer N+N*NB)
01278 *
01279                      CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ),
01280      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01281                      IE = ITAU
01282                      ITAUQ = IE + N
01283                      ITAUP = ITAUQ + N
01284                      IWORK = ITAUP + N
01285 *
01286 *                    Zero out below R in A
01287 *
01288                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
01289      $                            LDA )
01290 *
01291 *                    Bidiagonalize R in A
01292 *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
01293 *
01294                      CALL SGEBRD( N, N, A, LDA, S, WORK( IE ),
01295      $                            WORK( ITAUQ ), WORK( ITAUP ),
01296      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01297 *
01298 *                    Multiply Q in U by left vectors bidiagonalizing R
01299 *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
01300 *
01301                      CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
01302      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
01303      $                            LWORK-IWORK+1, IERR )
01304 *
01305 *                    Generate right vectors bidiagonalizing R in A
01306 *                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
01307 *
01308                      CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
01309      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01310                      IWORK = IE + N
01311 *
01312 *                    Perform bidiagonal QR iteration, computing left
01313 *                    singular vectors of A in U and computing right
01314 *                    singular vectors of A in A
01315 *                    (Workspace: need BDSPAC)
01316 *
01317                      CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
01318      $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
01319      $                            INFO )
01320 *
01321                   END IF
01322 *
01323                ELSE IF( WNTVAS ) THEN
01324 *
01325 *                 Path 6 (M much larger than N, JOBU='S', JOBVT='S'
01326 *                         or 'A')
01327 *                 N left singular vectors to be computed in U and
01328 *                 N right singular vectors to be computed in VT
01329 *
01330                   IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
01331 *
01332 *                    Sufficient workspace for a fast algorithm
01333 *
01334                      IU = 1
01335                      IF( LWORK.GE.WRKBL+LDA*N ) THEN
01336 *
01337 *                       WORK(IU) is LDA by N
01338 *
01339                         LDWRKU = LDA
01340                      ELSE
01341 *
01342 *                       WORK(IU) is N by N
01343 *
01344                         LDWRKU = N
01345                      END IF
01346                      ITAU = IU + LDWRKU*N
01347                      IWORK = ITAU + N
01348 *
01349 *                    Compute A=Q*R
01350 *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
01351 *
01352                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
01353      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01354 *
01355 *                    Copy R to WORK(IU), zeroing out below it
01356 *
01357                      CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ),
01358      $                            LDWRKU )
01359                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
01360      $                            WORK( IU+1 ), LDWRKU )
01361 *
01362 *                    Generate Q in A
01363 *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
01364 *
01365                      CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
01366      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01367                      IE = ITAU
01368                      ITAUQ = IE + N
01369                      ITAUP = ITAUQ + N
01370                      IWORK = ITAUP + N
01371 *
01372 *                    Bidiagonalize R in WORK(IU), copying result to VT
01373 *                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
01374 *
01375                      CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S,
01376      $                            WORK( IE ), WORK( ITAUQ ),
01377      $                            WORK( ITAUP ), WORK( IWORK ),
01378      $                            LWORK-IWORK+1, IERR )
01379                      CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
01380      $                            LDVT )
01381 *
01382 *                    Generate left bidiagonalizing vectors in WORK(IU)
01383 *                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
01384 *
01385                      CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
01386      $                            WORK( ITAUQ ), WORK( IWORK ),
01387      $                            LWORK-IWORK+1, IERR )
01388 *
01389 *                    Generate right bidiagonalizing vectors in VT
01390 *                    (Workspace: need N*N+4*N-1,
01391 *                                prefer N*N+3*N+(N-1)*NB)
01392 *
01393                      CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
01394      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01395                      IWORK = IE + N
01396 *
01397 *                    Perform bidiagonal QR iteration, computing left
01398 *                    singular vectors of R in WORK(IU) and computing
01399 *                    right singular vectors of R in VT
01400 *                    (Workspace: need N*N+BDSPAC)
01401 *
01402                      CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
01403      $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
01404      $                            WORK( IWORK ), INFO )
01405 *
01406 *                    Multiply Q in A by left singular vectors of R in
01407 *                    WORK(IU), storing result in U
01408 *                    (Workspace: need N*N)
01409 *
01410                      CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
01411      $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
01412 *
01413                   ELSE
01414 *
01415 *                    Insufficient workspace for a fast algorithm
01416 *
01417                      ITAU = 1
01418                      IWORK = ITAU + N
01419 *
01420 *                    Compute A=Q*R, copying result to U
01421 *                    (Workspace: need 2*N, prefer N+N*NB)
01422 *
01423                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
01424      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01425                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
01426 *
01427 *                    Generate Q in U
01428 *                    (Workspace: need 2*N, prefer N+N*NB)
01429 *
01430                      CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ),
01431      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01432 *
01433 *                    Copy R to VT, zeroing out below it
01434 *
01435                      CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
01436                      IF( N.GT.1 )
01437      $                  CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
01438      $                               VT( 2, 1 ), LDVT )
01439                      IE = ITAU
01440                      ITAUQ = IE + N
01441                      ITAUP = ITAUQ + N
01442                      IWORK = ITAUP + N
01443 *
01444 *                    Bidiagonalize R in VT
01445 *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
01446 *
01447                      CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ),
01448      $                            WORK( ITAUQ ), WORK( ITAUP ),
01449      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01450 *
01451 *                    Multiply Q in U by left bidiagonalizing vectors
01452 *                    in VT
01453 *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
01454 *
01455                      CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
01456      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
01457      $                            LWORK-IWORK+1, IERR )
01458 *
01459 *                    Generate right bidiagonalizing vectors in VT
01460 *                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
01461 *
01462                      CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
01463      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01464                      IWORK = IE + N
01465 *
01466 *                    Perform bidiagonal QR iteration, computing left
01467 *                    singular vectors of A in U and computing right
01468 *                    singular vectors of A in VT
01469 *                    (Workspace: need BDSPAC)
01470 *
01471                      CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
01472      $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
01473      $                            INFO )
01474 *
01475                   END IF
01476 *
01477                END IF
01478 *
01479             ELSE IF( WNTUA ) THEN
01480 *
01481                IF( WNTVN ) THEN
01482 *
01483 *                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
01484 *                 M left singular vectors to be computed in U and
01485 *                 no right singular vectors to be computed
01486 *
01487                   IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
01488 *
01489 *                    Sufficient workspace for a fast algorithm
01490 *
01491                      IR = 1
01492                      IF( LWORK.GE.WRKBL+LDA*N ) THEN
01493 *
01494 *                       WORK(IR) is LDA by N
01495 *
01496                         LDWRKR = LDA
01497                      ELSE
01498 *
01499 *                       WORK(IR) is N by N
01500 *
01501                         LDWRKR = N
01502                      END IF
01503                      ITAU = IR + LDWRKR*N
01504                      IWORK = ITAU + N
01505 *
01506 *                    Compute A=Q*R, copying result to U
01507 *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
01508 *
01509                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
01510      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01511                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
01512 *
01513 *                    Copy R to WORK(IR), zeroing out below it
01514 *
01515                      CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ),
01516      $                            LDWRKR )
01517                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
01518      $                            WORK( IR+1 ), LDWRKR )
01519 *
01520 *                    Generate Q in U
01521 *                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
01522 *
01523                      CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
01524      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01525                      IE = ITAU
01526                      ITAUQ = IE + N
01527                      ITAUP = ITAUQ + N
01528                      IWORK = ITAUP + N
01529 *
01530 *                    Bidiagonalize R in WORK(IR)
01531 *                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
01532 *
01533                      CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S,
01534      $                            WORK( IE ), WORK( ITAUQ ),
01535      $                            WORK( ITAUP ), WORK( IWORK ),
01536      $                            LWORK-IWORK+1, IERR )
01537 *
01538 *                    Generate left bidiagonalizing vectors in WORK(IR)
01539 *                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
01540 *
01541                      CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
01542      $                            WORK( ITAUQ ), WORK( IWORK ),
01543      $                            LWORK-IWORK+1, IERR )
01544                      IWORK = IE + N
01545 *
01546 *                    Perform bidiagonal QR iteration, computing left
01547 *                    singular vectors of R in WORK(IR)
01548 *                    (Workspace: need N*N+BDSPAC)
01549 *
01550                      CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
01551      $                            1, WORK( IR ), LDWRKR, DUM, 1,
01552      $                            WORK( IWORK ), INFO )
01553 *
01554 *                    Multiply Q in U by left singular vectors of R in
01555 *                    WORK(IR), storing result in A
01556 *                    (Workspace: need N*N)
01557 *
01558                      CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
01559      $                           WORK( IR ), LDWRKR, ZERO, A, LDA )
01560 *
01561 *                    Copy left singular vectors of A from A to U
01562 *
01563                      CALL SLACPY( 'F', M, N, A, LDA, U, LDU )
01564 *
01565                   ELSE
01566 *
01567 *                    Insufficient workspace for a fast algorithm
01568 *
01569                      ITAU = 1
01570                      IWORK = ITAU + N
01571 *
01572 *                    Compute A=Q*R, copying result to U
01573 *                    (Workspace: need 2*N, prefer N+N*NB)
01574 *
01575                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
01576      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01577                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
01578 *
01579 *                    Generate Q in U
01580 *                    (Workspace: need N+M, prefer N+M*NB)
01581 *
01582                      CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
01583      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01584                      IE = ITAU
01585                      ITAUQ = IE + N
01586                      ITAUP = ITAUQ + N
01587                      IWORK = ITAUP + N
01588 *
01589 *                    Zero out below R in A
01590 *
01591                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
01592      $                            LDA )
01593 *
01594 *                    Bidiagonalize R in A
01595 *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
01596 *
01597                      CALL SGEBRD( N, N, A, LDA, S, WORK( IE ),
01598      $                            WORK( ITAUQ ), WORK( ITAUP ),
01599      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01600 *
01601 *                    Multiply Q in U by left bidiagonalizing vectors
01602 *                    in A
01603 *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
01604 *
01605                      CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
01606      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
01607      $                            LWORK-IWORK+1, IERR )
01608                      IWORK = IE + N
01609 *
01610 *                    Perform bidiagonal QR iteration, computing left
01611 *                    singular vectors of A in U
01612 *                    (Workspace: need BDSPAC)
01613 *
01614                      CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
01615      $                            1, U, LDU, DUM, 1, WORK( IWORK ),
01616      $                            INFO )
01617 *
01618                   END IF
01619 *
01620                ELSE IF( WNTVO ) THEN
01621 *
01622 *                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
01623 *                 M left singular vectors to be computed in U and
01624 *                 N right singular vectors to be overwritten on A
01625 *
01626                   IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
01627 *
01628 *                    Sufficient workspace for a fast algorithm
01629 *
01630                      IU = 1
01631                      IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
01632 *
01633 *                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
01634 *
01635                         LDWRKU = LDA
01636                         IR = IU + LDWRKU*N
01637                         LDWRKR = LDA
01638                      ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
01639 *
01640 *                       WORK(IU) is LDA by N and WORK(IR) is N by N
01641 *
01642                         LDWRKU = LDA
01643                         IR = IU + LDWRKU*N
01644                         LDWRKR = N
01645                      ELSE
01646 *
01647 *                       WORK(IU) is N by N and WORK(IR) is N by N
01648 *
01649                         LDWRKU = N
01650                         IR = IU + LDWRKU*N
01651                         LDWRKR = N
01652                      END IF
01653                      ITAU = IR + LDWRKR*N
01654                      IWORK = ITAU + N
01655 *
01656 *                    Compute A=Q*R, copying result to U
01657 *                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
01658 *
01659                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
01660      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01661                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
01662 *
01663 *                    Generate Q in U
01664 *                    (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
01665 *
01666                      CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
01667      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01668 *
01669 *                    Copy R to WORK(IU), zeroing out below it
01670 *
01671                      CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ),
01672      $                            LDWRKU )
01673                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
01674      $                            WORK( IU+1 ), LDWRKU )
01675                      IE = ITAU
01676                      ITAUQ = IE + N
01677                      ITAUP = ITAUQ + N
01678                      IWORK = ITAUP + N
01679 *
01680 *                    Bidiagonalize R in WORK(IU), copying result to
01681 *                    WORK(IR)
01682 *                    (Workspace: need 2*N*N+4*N,
01683 *                                prefer 2*N*N+3*N+2*N*NB)
01684 *
01685                      CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S,
01686      $                            WORK( IE ), WORK( ITAUQ ),
01687      $                            WORK( ITAUP ), WORK( IWORK ),
01688      $                            LWORK-IWORK+1, IERR )
01689                      CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU,
01690      $                            WORK( IR ), LDWRKR )
01691 *
01692 *                    Generate left bidiagonalizing vectors in WORK(IU)
01693 *                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
01694 *
01695                      CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
01696      $                            WORK( ITAUQ ), WORK( IWORK ),
01697      $                            LWORK-IWORK+1, IERR )
01698 *
01699 *                    Generate right bidiagonalizing vectors in WORK(IR)
01700 *                    (Workspace: need 2*N*N+4*N-1,
01701 *                                prefer 2*N*N+3*N+(N-1)*NB)
01702 *
01703                      CALL SORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
01704      $                            WORK( ITAUP ), WORK( IWORK ),
01705      $                            LWORK-IWORK+1, IERR )
01706                      IWORK = IE + N
01707 *
01708 *                    Perform bidiagonal QR iteration, computing left
01709 *                    singular vectors of R in WORK(IU) and computing
01710 *                    right singular vectors of R in WORK(IR)
01711 *                    (Workspace: need 2*N*N+BDSPAC)
01712 *
01713                      CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
01714      $                            WORK( IR ), LDWRKR, WORK( IU ),
01715      $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
01716 *
01717 *                    Multiply Q in U by left singular vectors of R in
01718 *                    WORK(IU), storing result in A
01719 *                    (Workspace: need N*N)
01720 *
01721                      CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
01722      $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
01723 *
01724 *                    Copy left singular vectors of A from A to U
01725 *
01726                      CALL SLACPY( 'F', M, N, A, LDA, U, LDU )
01727 *
01728 *                    Copy right singular vectors of R from WORK(IR) to A
01729 *
01730                      CALL SLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
01731      $                            LDA )
01732 *
01733                   ELSE
01734 *
01735 *                    Insufficient workspace for a fast algorithm
01736 *
01737                      ITAU = 1
01738                      IWORK = ITAU + N
01739 *
01740 *                    Compute A=Q*R, copying result to U
01741 *                    (Workspace: need 2*N, prefer N+N*NB)
01742 *
01743                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
01744      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01745                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
01746 *
01747 *                    Generate Q in U
01748 *                    (Workspace: need N+M, prefer N+M*NB)
01749 *
01750                      CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
01751      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01752                      IE = ITAU
01753                      ITAUQ = IE + N
01754                      ITAUP = ITAUQ + N
01755                      IWORK = ITAUP + N
01756 *
01757 *                    Zero out below R in A
01758 *
01759                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
01760      $                            LDA )
01761 *
01762 *                    Bidiagonalize R in A
01763 *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
01764 *
01765                      CALL SGEBRD( N, N, A, LDA, S, WORK( IE ),
01766      $                            WORK( ITAUQ ), WORK( ITAUP ),
01767      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01768 *
01769 *                    Multiply Q in U by left bidiagonalizing vectors
01770 *                    in A
01771 *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
01772 *
01773                      CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
01774      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
01775      $                            LWORK-IWORK+1, IERR )
01776 *
01777 *                    Generate right bidiagonalizing vectors in A
01778 *                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
01779 *
01780                      CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
01781      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01782                      IWORK = IE + N
01783 *
01784 *                    Perform bidiagonal QR iteration, computing left
01785 *                    singular vectors of A in U and computing right
01786 *                    singular vectors of A in A
01787 *                    (Workspace: need BDSPAC)
01788 *
01789                      CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
01790      $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
01791      $                            INFO )
01792 *
01793                   END IF
01794 *
01795                ELSE IF( WNTVAS ) THEN
01796 *
01797 *                 Path 9 (M much larger than N, JOBU='A', JOBVT='S'
01798 *                         or 'A')
01799 *                 M left singular vectors to be computed in U and
01800 *                 N right singular vectors to be computed in VT
01801 *
01802                   IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
01803 *
01804 *                    Sufficient workspace for a fast algorithm
01805 *
01806                      IU = 1
01807                      IF( LWORK.GE.WRKBL+LDA*N ) THEN
01808 *
01809 *                       WORK(IU) is LDA by N
01810 *
01811                         LDWRKU = LDA
01812                      ELSE
01813 *
01814 *                       WORK(IU) is N by N
01815 *
01816                         LDWRKU = N
01817                      END IF
01818                      ITAU = IU + LDWRKU*N
01819                      IWORK = ITAU + N
01820 *
01821 *                    Compute A=Q*R, copying result to U
01822 *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
01823 *
01824                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
01825      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01826                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
01827 *
01828 *                    Generate Q in U
01829 *                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
01830 *
01831                      CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
01832      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01833 *
01834 *                    Copy R to WORK(IU), zeroing out below it
01835 *
01836                      CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ),
01837      $                            LDWRKU )
01838                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
01839      $                            WORK( IU+1 ), LDWRKU )
01840                      IE = ITAU
01841                      ITAUQ = IE + N
01842                      ITAUP = ITAUQ + N
01843                      IWORK = ITAUP + N
01844 *
01845 *                    Bidiagonalize R in WORK(IU), copying result to VT
01846 *                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
01847 *
01848                      CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S,
01849      $                            WORK( IE ), WORK( ITAUQ ),
01850      $                            WORK( ITAUP ), WORK( IWORK ),
01851      $                            LWORK-IWORK+1, IERR )
01852                      CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
01853      $                            LDVT )
01854 *
01855 *                    Generate left bidiagonalizing vectors in WORK(IU)
01856 *                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
01857 *
01858                      CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
01859      $                            WORK( ITAUQ ), WORK( IWORK ),
01860      $                            LWORK-IWORK+1, IERR )
01861 *
01862 *                    Generate right bidiagonalizing vectors in VT
01863 *                    (Workspace: need N*N+4*N-1,
01864 *                                prefer N*N+3*N+(N-1)*NB)
01865 *
01866                      CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
01867      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01868                      IWORK = IE + N
01869 *
01870 *                    Perform bidiagonal QR iteration, computing left
01871 *                    singular vectors of R in WORK(IU) and computing
01872 *                    right singular vectors of R in VT
01873 *                    (Workspace: need N*N+BDSPAC)
01874 *
01875                      CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
01876      $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
01877      $                            WORK( IWORK ), INFO )
01878 *
01879 *                    Multiply Q in U by left singular vectors of R in
01880 *                    WORK(IU), storing result in A
01881 *                    (Workspace: need N*N)
01882 *
01883                      CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
01884      $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
01885 *
01886 *                    Copy left singular vectors of A from A to U
01887 *
01888                      CALL SLACPY( 'F', M, N, A, LDA, U, LDU )
01889 *
01890                   ELSE
01891 *
01892 *                    Insufficient workspace for a fast algorithm
01893 *
01894                      ITAU = 1
01895                      IWORK = ITAU + N
01896 *
01897 *                    Compute A=Q*R, copying result to U
01898 *                    (Workspace: need 2*N, prefer N+N*NB)
01899 *
01900                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
01901      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01902                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
01903 *
01904 *                    Generate Q in U
01905 *                    (Workspace: need N+M, prefer N+M*NB)
01906 *
01907                      CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
01908      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01909 *
01910 *                    Copy R from A to VT, zeroing out below it
01911 *
01912                      CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
01913                      IF( N.GT.1 )
01914      $                  CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
01915      $                               VT( 2, 1 ), LDVT )
01916                      IE = ITAU
01917                      ITAUQ = IE + N
01918                      ITAUP = ITAUQ + N
01919                      IWORK = ITAUP + N
01920 *
01921 *                    Bidiagonalize R in VT
01922 *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
01923 *
01924                      CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ),
01925      $                            WORK( ITAUQ ), WORK( ITAUP ),
01926      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01927 *
01928 *                    Multiply Q in U by left bidiagonalizing vectors
01929 *                    in VT
01930 *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
01931 *
01932                      CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
01933      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
01934      $                            LWORK-IWORK+1, IERR )
01935 *
01936 *                    Generate right bidiagonalizing vectors in VT
01937 *                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
01938 *
01939                      CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
01940      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01941                      IWORK = IE + N
01942 *
01943 *                    Perform bidiagonal QR iteration, computing left
01944 *                    singular vectors of A in U and computing right
01945 *                    singular vectors of A in VT
01946 *                    (Workspace: need BDSPAC)
01947 *
01948                      CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
01949      $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
01950      $                            INFO )
01951 *
01952                   END IF
01953 *
01954                END IF
01955 *
01956             END IF
01957 *
01958          ELSE
01959 *
01960 *           M .LT. MNTHR
01961 *
01962 *           Path 10 (M at least N, but not much larger)
01963 *           Reduce to bidiagonal form without QR decomposition
01964 *
01965             IE = 1
01966             ITAUQ = IE + N
01967             ITAUP = ITAUQ + N
01968             IWORK = ITAUP + N
01969 *
01970 *           Bidiagonalize A
01971 *           (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
01972 *
01973             CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
01974      $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
01975      $                   IERR )
01976             IF( WNTUAS ) THEN
01977 *
01978 *              If left singular vectors desired in U, copy result to U
01979 *              and generate left bidiagonalizing vectors in U
01980 *              (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
01981 *
01982                CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
01983                IF( WNTUS )
01984      $            NCU = N
01985                IF( WNTUA )
01986      $            NCU = M
01987                CALL SORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
01988      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
01989             END IF
01990             IF( WNTVAS ) THEN
01991 *
01992 *              If right singular vectors desired in VT, copy result to
01993 *              VT and generate right bidiagonalizing vectors in VT
01994 *              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
01995 *
01996                CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
01997                CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
01998      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
01999             END IF
02000             IF( WNTUO ) THEN
02001 *
02002 *              If left singular vectors desired in A, generate left
02003 *              bidiagonalizing vectors in A
02004 *              (Workspace: need 4*N, prefer 3*N+N*NB)
02005 *
02006                CALL SORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
02007      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
02008             END IF
02009             IF( WNTVO ) THEN
02010 *
02011 *              If right singular vectors desired in A, generate right
02012 *              bidiagonalizing vectors in A
02013 *              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
02014 *
02015                CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
02016      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
02017             END IF
02018             IWORK = IE + N
02019             IF( WNTUAS .OR. WNTUO )
02020      $         NRU = M
02021             IF( WNTUN )
02022      $         NRU = 0
02023             IF( WNTVAS .OR. WNTVO )
02024      $         NCVT = N
02025             IF( WNTVN )
02026      $         NCVT = 0
02027             IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
02028 *
02029 *              Perform bidiagonal QR iteration, if desired, computing
02030 *              left singular vectors in U and computing right singular
02031 *              vectors in VT
02032 *              (Workspace: need BDSPAC)
02033 *
02034                CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
02035      $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
02036             ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
02037 *
02038 *              Perform bidiagonal QR iteration, if desired, computing
02039 *              left singular vectors in U and computing right singular
02040 *              vectors in A
02041 *              (Workspace: need BDSPAC)
02042 *
02043                CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
02044      $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
02045             ELSE
02046 *
02047 *              Perform bidiagonal QR iteration, if desired, computing
02048 *              left singular vectors in A and computing right singular
02049 *              vectors in VT
02050 *              (Workspace: need BDSPAC)
02051 *
02052                CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
02053      $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
02054             END IF
02055 *
02056          END IF
02057 *
02058       ELSE
02059 *
02060 *        A has more columns than rows. If A has sufficiently more
02061 *        columns than rows, first reduce using the LQ decomposition (if
02062 *        sufficient workspace available)
02063 *
02064          IF( N.GE.MNTHR ) THEN
02065 *
02066             IF( WNTVN ) THEN
02067 *
02068 *              Path 1t(N much larger than M, JOBVT='N')
02069 *              No right singular vectors to be computed
02070 *
02071                ITAU = 1
02072                IWORK = ITAU + M
02073 *
02074 *              Compute A=L*Q
02075 *              (Workspace: need 2*M, prefer M+M*NB)
02076 *
02077                CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
02078      $                      LWORK-IWORK+1, IERR )
02079 *
02080 *              Zero out above L
02081 *
02082                CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
02083                IE = 1
02084                ITAUQ = IE + M
02085                ITAUP = ITAUQ + M
02086                IWORK = ITAUP + M
02087 *
02088 *              Bidiagonalize L in A
02089 *              (Workspace: need 4*M, prefer 3*M+2*M*NB)
02090 *
02091                CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
02092      $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
02093      $                      IERR )
02094                IF( WNTUO .OR. WNTUAS ) THEN
02095 *
02096 *                 If left singular vectors desired, generate Q
02097 *                 (Workspace: need 4*M, prefer 3*M+M*NB)
02098 *
02099                   CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
02100      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02101                END IF
02102                IWORK = IE + M
02103                NRU = 0
02104                IF( WNTUO .OR. WNTUAS )
02105      $            NRU = M
02106 *
02107 *              Perform bidiagonal QR iteration, computing left singular
02108 *              vectors of A in A if desired
02109 *              (Workspace: need BDSPAC)
02110 *
02111                CALL SBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A,
02112      $                      LDA, DUM, 1, WORK( IWORK ), INFO )
02113 *
02114 *              If left singular vectors desired in U, copy them there
02115 *
02116                IF( WNTUAS )
02117      $            CALL SLACPY( 'F', M, M, A, LDA, U, LDU )
02118 *
02119             ELSE IF( WNTVO .AND. WNTUN ) THEN
02120 *
02121 *              Path 2t(N much larger than M, JOBU='N', JOBVT='O')
02122 *              M right singular vectors to be overwritten on A and
02123 *              no left singular vectors to be computed
02124 *
02125                IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
02126 *
02127 *                 Sufficient workspace for a fast algorithm
02128 *
02129                   IR = 1
02130                   IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
02131 *
02132 *                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
02133 *
02134                      LDWRKU = LDA
02135                      CHUNK = N
02136                      LDWRKR = LDA
02137                   ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
02138 *
02139 *                    WORK(IU) is LDA by N and WORK(IR) is M by M
02140 *
02141                      LDWRKU = LDA
02142                      CHUNK = N
02143                      LDWRKR = M
02144                   ELSE
02145 *
02146 *                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
02147 *
02148                      LDWRKU = M
02149                      CHUNK = ( LWORK-M*M-M ) / M
02150                      LDWRKR = M
02151                   END IF
02152                   ITAU = IR + LDWRKR*M
02153                   IWORK = ITAU + M
02154 *
02155 *                 Compute A=L*Q
02156 *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
02157 *
02158                   CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
02159      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02160 *
02161 *                 Copy L to WORK(IR) and zero out above it
02162 *
02163                   CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
02164                   CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
02165      $                         WORK( IR+LDWRKR ), LDWRKR )
02166 *
02167 *                 Generate Q in A
02168 *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
02169 *
02170                   CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
02171      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02172                   IE = ITAU
02173                   ITAUQ = IE + M
02174                   ITAUP = ITAUQ + M
02175                   IWORK = ITAUP + M
02176 *
02177 *                 Bidiagonalize L in WORK(IR)
02178 *                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
02179 *
02180                   CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ),
02181      $                         WORK( ITAUQ ), WORK( ITAUP ),
02182      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02183 *
02184 *                 Generate right vectors bidiagonalizing L
02185 *                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
02186 *
02187                   CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
02188      $                         WORK( ITAUP ), WORK( IWORK ),
02189      $                         LWORK-IWORK+1, IERR )
02190                   IWORK = IE + M
02191 *
02192 *                 Perform bidiagonal QR iteration, computing right
02193 *                 singular vectors of L in WORK(IR)
02194 *                 (Workspace: need M*M+BDSPAC)
02195 *
02196                   CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
02197      $                         WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
02198      $                         WORK( IWORK ), INFO )
02199                   IU = IE + M
02200 *
02201 *                 Multiply right singular vectors of L in WORK(IR) by Q
02202 *                 in A, storing result in WORK(IU) and copying to A
02203 *                 (Workspace: need M*M+2*M, prefer M*M+M*N+M)
02204 *
02205                   DO 30 I = 1, N, CHUNK
02206                      BLK = MIN( N-I+1, CHUNK )
02207                      CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
02208      $                           LDWRKR, A( 1, I ), LDA, ZERO,
02209      $                           WORK( IU ), LDWRKU )
02210                      CALL SLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
02211      $                            A( 1, I ), LDA )
02212    30             CONTINUE
02213 *
02214                ELSE
02215 *
02216 *                 Insufficient workspace for a fast algorithm
02217 *
02218                   IE = 1
02219                   ITAUQ = IE + M
02220                   ITAUP = ITAUQ + M
02221                   IWORK = ITAUP + M
02222 *
02223 *                 Bidiagonalize A
02224 *                 (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
02225 *
02226                   CALL SGEBRD( M, N, A, LDA, S, WORK( IE ),
02227      $                         WORK( ITAUQ ), WORK( ITAUP ),
02228      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02229 *
02230 *                 Generate right vectors bidiagonalizing A
02231 *                 (Workspace: need 4*M, prefer 3*M+M*NB)
02232 *
02233                   CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
02234      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02235                   IWORK = IE + M
02236 *
02237 *                 Perform bidiagonal QR iteration, computing right
02238 *                 singular vectors of A in A
02239 *                 (Workspace: need BDSPAC)
02240 *
02241                   CALL SBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA,
02242      $                         DUM, 1, DUM, 1, WORK( IWORK ), INFO )
02243 *
02244                END IF
02245 *
02246             ELSE IF( WNTVO .AND. WNTUAS ) THEN
02247 *
02248 *              Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
02249 *              M right singular vectors to be overwritten on A and
02250 *              M left singular vectors to be computed in U
02251 *
02252                IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
02253 *
02254 *                 Sufficient workspace for a fast algorithm
02255 *
02256                   IR = 1
02257                   IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
02258 *
02259 *                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
02260 *
02261                      LDWRKU = LDA
02262                      CHUNK = N
02263                      LDWRKR = LDA
02264                   ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
02265 *
02266 *                    WORK(IU) is LDA by N and WORK(IR) is M by M
02267 *
02268                      LDWRKU = LDA
02269                      CHUNK = N
02270                      LDWRKR = M
02271                   ELSE
02272 *
02273 *                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
02274 *
02275                      LDWRKU = M
02276                      CHUNK = ( LWORK-M*M-M ) / M
02277                      LDWRKR = M
02278                   END IF
02279                   ITAU = IR + LDWRKR*M
02280                   IWORK = ITAU + M
02281 *
02282 *                 Compute A=L*Q
02283 *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
02284 *
02285                   CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
02286      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02287 *
02288 *                 Copy L to U, zeroing about above it
02289 *
02290                   CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
02291                   CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
02292      $                         LDU )
02293 *
02294 *                 Generate Q in A
02295 *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
02296 *
02297                   CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
02298      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02299                   IE = ITAU
02300                   ITAUQ = IE + M
02301                   ITAUP = ITAUQ + M
02302                   IWORK = ITAUP + M
02303 *
02304 *                 Bidiagonalize L in U, copying result to WORK(IR)
02305 *                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
02306 *
02307                   CALL SGEBRD( M, M, U, LDU, S, WORK( IE ),
02308      $                         WORK( ITAUQ ), WORK( ITAUP ),
02309      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02310                   CALL SLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
02311 *
02312 *                 Generate right vectors bidiagonalizing L in WORK(IR)
02313 *                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
02314 *
02315                   CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
02316      $                         WORK( ITAUP ), WORK( IWORK ),
02317      $                         LWORK-IWORK+1, IERR )
02318 *
02319 *                 Generate left vectors bidiagonalizing L in U
02320 *                 (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
02321 *
02322                   CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
02323      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02324                   IWORK = IE + M
02325 *
02326 *                 Perform bidiagonal QR iteration, computing left
02327 *                 singular vectors of L in U, and computing right
02328 *                 singular vectors of L in WORK(IR)
02329 *                 (Workspace: need M*M+BDSPAC)
02330 *
02331                   CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
02332      $                         WORK( IR ), LDWRKR, U, LDU, DUM, 1,
02333      $                         WORK( IWORK ), INFO )
02334                   IU = IE + M
02335 *
02336 *                 Multiply right singular vectors of L in WORK(IR) by Q
02337 *                 in A, storing result in WORK(IU) and copying to A
02338 *                 (Workspace: need M*M+2*M, prefer M*M+M*N+M))
02339 *
02340                   DO 40 I = 1, N, CHUNK
02341                      BLK = MIN( N-I+1, CHUNK )
02342                      CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
02343      $                           LDWRKR, A( 1, I ), LDA, ZERO,
02344      $                           WORK( IU ), LDWRKU )
02345                      CALL SLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
02346      $                            A( 1, I ), LDA )
02347    40             CONTINUE
02348 *
02349                ELSE
02350 *
02351 *                 Insufficient workspace for a fast algorithm
02352 *
02353                   ITAU = 1
02354                   IWORK = ITAU + M
02355 *
02356 *                 Compute A=L*Q
02357 *                 (Workspace: need 2*M, prefer M+M*NB)
02358 *
02359                   CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
02360      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02361 *
02362 *                 Copy L to U, zeroing out above it
02363 *
02364                   CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
02365                   CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
02366      $                         LDU )
02367 *
02368 *                 Generate Q in A
02369 *                 (Workspace: need 2*M, prefer M+M*NB)
02370 *
02371                   CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
02372      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02373                   IE = ITAU
02374                   ITAUQ = IE + M
02375                   ITAUP = ITAUQ + M
02376                   IWORK = ITAUP + M
02377 *
02378 *                 Bidiagonalize L in U
02379 *                 (Workspace: need 4*M, prefer 3*M+2*M*NB)
02380 *
02381                   CALL SGEBRD( M, M, U, LDU, S, WORK( IE ),
02382      $                         WORK( ITAUQ ), WORK( ITAUP ),
02383      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02384 *
02385 *                 Multiply right vectors bidiagonalizing L by Q in A
02386 *                 (Workspace: need 3*M+N, prefer 3*M+N*NB)
02387 *
02388                   CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
02389      $                         WORK( ITAUP ), A, LDA, WORK( IWORK ),
02390      $                         LWORK-IWORK+1, IERR )
02391 *
02392 *                 Generate left vectors bidiagonalizing L in U
02393 *                 (Workspace: need 4*M, prefer 3*M+M*NB)
02394 *
02395                   CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
02396      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02397                   IWORK = IE + M
02398 *
02399 *                 Perform bidiagonal QR iteration, computing left
02400 *                 singular vectors of A in U and computing right
02401 *                 singular vectors of A in A
02402 *                 (Workspace: need BDSPAC)
02403 *
02404                   CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA,
02405      $                         U, LDU, DUM, 1, WORK( IWORK ), INFO )
02406 *
02407                END IF
02408 *
02409             ELSE IF( WNTVS ) THEN
02410 *
02411                IF( WNTUN ) THEN
02412 *
02413 *                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
02414 *                 M right singular vectors to be computed in VT and
02415 *                 no left singular vectors to be computed
02416 *
02417                   IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
02418 *
02419 *                    Sufficient workspace for a fast algorithm
02420 *
02421                      IR = 1
02422                      IF( LWORK.GE.WRKBL+LDA*M ) THEN
02423 *
02424 *                       WORK(IR) is LDA by M
02425 *
02426                         LDWRKR = LDA
02427                      ELSE
02428 *
02429 *                       WORK(IR) is M by M
02430 *
02431                         LDWRKR = M
02432                      END IF
02433                      ITAU = IR + LDWRKR*M
02434                      IWORK = ITAU + M
02435 *
02436 *                    Compute A=L*Q
02437 *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
02438 *
02439                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
02440      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02441 *
02442 *                    Copy L to WORK(IR), zeroing out above it
02443 *
02444                      CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ),
02445      $                            LDWRKR )
02446                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
02447      $                            WORK( IR+LDWRKR ), LDWRKR )
02448 *
02449 *                    Generate Q in A
02450 *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
02451 *
02452                      CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
02453      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02454                      IE = ITAU
02455                      ITAUQ = IE + M
02456                      ITAUP = ITAUQ + M
02457                      IWORK = ITAUP + M
02458 *
02459 *                    Bidiagonalize L in WORK(IR)
02460 *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
02461 *
02462                      CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S,
02463      $                            WORK( IE ), WORK( ITAUQ ),
02464      $                            WORK( ITAUP ), WORK( IWORK ),
02465      $                            LWORK-IWORK+1, IERR )
02466 *
02467 *                    Generate right vectors bidiagonalizing L in
02468 *                    WORK(IR)
02469 *                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
02470 *
02471                      CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
02472      $                            WORK( ITAUP ), WORK( IWORK ),
02473      $                            LWORK-IWORK+1, IERR )
02474                      IWORK = IE + M
02475 *
02476 *                    Perform bidiagonal QR iteration, computing right
02477 *                    singular vectors of L in WORK(IR)
02478 *                    (Workspace: need M*M+BDSPAC)
02479 *
02480                      CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
02481      $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
02482      $                            WORK( IWORK ), INFO )
02483 *
02484 *                    Multiply right singular vectors of L in WORK(IR) by
02485 *                    Q in A, storing result in VT
02486 *                    (Workspace: need M*M)
02487 *
02488                      CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
02489      $                           LDWRKR, A, LDA, ZERO, VT, LDVT )
02490 *
02491                   ELSE
02492 *
02493 *                    Insufficient workspace for a fast algorithm
02494 *
02495                      ITAU = 1
02496                      IWORK = ITAU + M
02497 *
02498 *                    Compute A=L*Q
02499 *                    (Workspace: need 2*M, prefer M+M*NB)
02500 *
02501                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
02502      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02503 *
02504 *                    Copy result to VT
02505 *
02506                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
02507 *
02508 *                    Generate Q in VT
02509 *                    (Workspace: need 2*M, prefer M+M*NB)
02510 *
02511                      CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
02512      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02513                      IE = ITAU
02514                      ITAUQ = IE + M
02515                      ITAUP = ITAUQ + M
02516                      IWORK = ITAUP + M
02517 *
02518 *                    Zero out above L in A
02519 *
02520                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
02521      $                            LDA )
02522 *
02523 *                    Bidiagonalize L in A
02524 *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
02525 *
02526                      CALL SGEBRD( M, M, A, LDA, S, WORK( IE ),
02527      $                            WORK( ITAUQ ), WORK( ITAUP ),
02528      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02529 *
02530 *                    Multiply right vectors bidiagonalizing L by Q in VT
02531 *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
02532 *
02533                      CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
02534      $                            WORK( ITAUP ), VT, LDVT,
02535      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02536                      IWORK = IE + M
02537 *
02538 *                    Perform bidiagonal QR iteration, computing right
02539 *                    singular vectors of A in VT
02540 *                    (Workspace: need BDSPAC)
02541 *
02542                      CALL SBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
02543      $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
02544      $                            INFO )
02545 *
02546                   END IF
02547 *
02548                ELSE IF( WNTUO ) THEN
02549 *
02550 *                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
02551 *                 M right singular vectors to be computed in VT and
02552 *                 M left singular vectors to be overwritten on A
02553 *
02554                   IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN
02555 *
02556 *                    Sufficient workspace for a fast algorithm
02557 *
02558                      IU = 1
02559                      IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
02560 *
02561 *                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
02562 *
02563                         LDWRKU = LDA
02564                         IR = IU + LDWRKU*M
02565                         LDWRKR = LDA
02566                      ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
02567 *
02568 *                       WORK(IU) is LDA by M and WORK(IR) is M by M
02569 *
02570                         LDWRKU = LDA
02571                         IR = IU + LDWRKU*M
02572                         LDWRKR = M
02573                      ELSE
02574 *
02575 *                       WORK(IU) is M by M and WORK(IR) is M by M
02576 *
02577                         LDWRKU = M
02578                         IR = IU + LDWRKU*M
02579                         LDWRKR = M
02580                      END IF
02581                      ITAU = IR + LDWRKR*M
02582                      IWORK = ITAU + M
02583 *
02584 *                    Compute A=L*Q
02585 *                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
02586 *
02587                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
02588      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02589 *
02590 *                    Copy L to WORK(IU), zeroing out below it
02591 *
02592                      CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ),
02593      $                            LDWRKU )
02594                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
02595      $                            WORK( IU+LDWRKU ), LDWRKU )
02596 *
02597 *                    Generate Q in A
02598 *                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
02599 *
02600                      CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
02601      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02602                      IE = ITAU
02603                      ITAUQ = IE + M
02604                      ITAUP = ITAUQ + M
02605                      IWORK = ITAUP + M
02606 *
02607 *                    Bidiagonalize L in WORK(IU), copying result to
02608 *                    WORK(IR)
02609 *                    (Workspace: need 2*M*M+4*M,
02610 *                                prefer 2*M*M+3*M+2*M*NB)
02611 *
02612                      CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S,
02613      $                            WORK( IE ), WORK( ITAUQ ),
02614      $                            WORK( ITAUP ), WORK( IWORK ),
02615      $                            LWORK-IWORK+1, IERR )
02616                      CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU,
02617      $                            WORK( IR ), LDWRKR )
02618 *
02619 *                    Generate right bidiagonalizing vectors in WORK(IU)
02620 *                    (Workspace: need 2*M*M+4*M-1,
02621 *                                prefer 2*M*M+3*M+(M-1)*NB)
02622 *
02623                      CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
02624      $                            WORK( ITAUP ), WORK( IWORK ),
02625      $                            LWORK-IWORK+1, IERR )
02626 *
02627 *                    Generate left bidiagonalizing vectors in WORK(IR)
02628 *                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
02629 *
02630                      CALL SORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
02631      $                            WORK( ITAUQ ), WORK( IWORK ),
02632      $                            LWORK-IWORK+1, IERR )
02633                      IWORK = IE + M
02634 *
02635 *                    Perform bidiagonal QR iteration, computing left
02636 *                    singular vectors of L in WORK(IR) and computing
02637 *                    right singular vectors of L in WORK(IU)
02638 *                    (Workspace: need 2*M*M+BDSPAC)
02639 *
02640                      CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
02641      $                            WORK( IU ), LDWRKU, WORK( IR ),
02642      $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
02643 *
02644 *                    Multiply right singular vectors of L in WORK(IU) by
02645 *                    Q in A, storing result in VT
02646 *                    (Workspace: need M*M)
02647 *
02648                      CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
02649      $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
02650 *
02651 *                    Copy left singular vectors of L to A
02652 *                    (Workspace: need M*M)
02653 *
02654                      CALL SLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
02655      $                            LDA )
02656 *
02657                   ELSE
02658 *
02659 *                    Insufficient workspace for a fast algorithm
02660 *
02661                      ITAU = 1
02662                      IWORK = ITAU + M
02663 *
02664 *                    Compute A=L*Q, copying result to VT
02665 *                    (Workspace: need 2*M, prefer M+M*NB)
02666 *
02667                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
02668      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02669                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
02670 *
02671 *                    Generate Q in VT
02672 *                    (Workspace: need 2*M, prefer M+M*NB)
02673 *
02674                      CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
02675      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02676                      IE = ITAU
02677                      ITAUQ = IE + M
02678                      ITAUP = ITAUQ + M
02679                      IWORK = ITAUP + M
02680 *
02681 *                    Zero out above L in A
02682 *
02683                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
02684      $                            LDA )
02685 *
02686 *                    Bidiagonalize L in A
02687 *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
02688 *
02689                      CALL SGEBRD( M, M, A, LDA, S, WORK( IE ),
02690      $                            WORK( ITAUQ ), WORK( ITAUP ),
02691      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02692 *
02693 *                    Multiply right vectors bidiagonalizing L by Q in VT
02694 *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
02695 *
02696                      CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
02697      $                            WORK( ITAUP ), VT, LDVT,
02698      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02699 *
02700 *                    Generate left bidiagonalizing vectors of L in A
02701 *                    (Workspace: need 4*M, prefer 3*M+M*NB)
02702 *
02703                      CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
02704      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02705                      IWORK = IE + M
02706 *
02707 *                    Perform bidiagonal QR iteration, compute left
02708 *                    singular vectors of A in A and compute right
02709 *                    singular vectors of A in VT
02710 *                    (Workspace: need BDSPAC)
02711 *
02712                      CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
02713      $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
02714      $                            INFO )
02715 *
02716                   END IF
02717 *
02718                ELSE IF( WNTUAS ) THEN
02719 *
02720 *                 Path 6t(N much larger than M, JOBU='S' or 'A',
02721 *                         JOBVT='S')
02722 *                 M right singular vectors to be computed in VT and
02723 *                 M left singular vectors to be computed in U
02724 *
02725                   IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
02726 *
02727 *                    Sufficient workspace for a fast algorithm
02728 *
02729                      IU = 1
02730                      IF( LWORK.GE.WRKBL+LDA*M ) THEN
02731 *
02732 *                       WORK(IU) is LDA by N
02733 *
02734                         LDWRKU = LDA
02735                      ELSE
02736 *
02737 *                       WORK(IU) is LDA by M
02738 *
02739                         LDWRKU = M
02740                      END IF
02741                      ITAU = IU + LDWRKU*M
02742                      IWORK = ITAU + M
02743 *
02744 *                    Compute A=L*Q
02745 *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
02746 *
02747                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
02748      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02749 *
02750 *                    Copy L to WORK(IU), zeroing out above it
02751 *
02752                      CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ),
02753      $                            LDWRKU )
02754                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
02755      $                            WORK( IU+LDWRKU ), LDWRKU )
02756 *
02757 *                    Generate Q in A
02758 *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
02759 *
02760                      CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
02761      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02762                      IE = ITAU
02763                      ITAUQ = IE + M
02764                      ITAUP = ITAUQ + M
02765                      IWORK = ITAUP + M
02766 *
02767 *                    Bidiagonalize L in WORK(IU), copying result to U
02768 *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
02769 *
02770                      CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S,
02771      $                            WORK( IE ), WORK( ITAUQ ),
02772      $                            WORK( ITAUP ), WORK( IWORK ),
02773      $                            LWORK-IWORK+1, IERR )
02774                      CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
02775      $                            LDU )
02776 *
02777 *                    Generate right bidiagonalizing vectors in WORK(IU)
02778 *                    (Workspace: need M*M+4*M-1,
02779 *                                prefer M*M+3*M+(M-1)*NB)
02780 *
02781                      CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
02782      $                            WORK( ITAUP ), WORK( IWORK ),
02783      $                            LWORK-IWORK+1, IERR )
02784 *
02785 *                    Generate left bidiagonalizing vectors in U
02786 *                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
02787 *
02788                      CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
02789      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02790                      IWORK = IE + M
02791 *
02792 *                    Perform bidiagonal QR iteration, computing left
02793 *                    singular vectors of L in U and computing right
02794 *                    singular vectors of L in WORK(IU)
02795 *                    (Workspace: need M*M+BDSPAC)
02796 *
02797                      CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
02798      $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
02799      $                            WORK( IWORK ), INFO )
02800 *
02801 *                    Multiply right singular vectors of L in WORK(IU) by
02802 *                    Q in A, storing result in VT
02803 *                    (Workspace: need M*M)
02804 *
02805                      CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
02806      $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
02807 *
02808                   ELSE
02809 *
02810 *                    Insufficient workspace for a fast algorithm
02811 *
02812                      ITAU = 1
02813                      IWORK = ITAU + M
02814 *
02815 *                    Compute A=L*Q, copying result to VT
02816 *                    (Workspace: need 2*M, prefer M+M*NB)
02817 *
02818                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
02819      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02820                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
02821 *
02822 *                    Generate Q in VT
02823 *                    (Workspace: need 2*M, prefer M+M*NB)
02824 *
02825                      CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
02826      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02827 *
02828 *                    Copy L to U, zeroing out above it
02829 *
02830                      CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
02831                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
02832      $                            LDU )
02833                      IE = ITAU
02834                      ITAUQ = IE + M
02835                      ITAUP = ITAUQ + M
02836                      IWORK = ITAUP + M
02837 *
02838 *                    Bidiagonalize L in U
02839 *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
02840 *
02841                      CALL SGEBRD( M, M, U, LDU, S, WORK( IE ),
02842      $                            WORK( ITAUQ ), WORK( ITAUP ),
02843      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02844 *
02845 *                    Multiply right bidiagonalizing vectors in U by Q
02846 *                    in VT
02847 *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
02848 *
02849                      CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
02850      $                            WORK( ITAUP ), VT, LDVT,
02851      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02852 *
02853 *                    Generate left bidiagonalizing vectors in U
02854 *                    (Workspace: need 4*M, prefer 3*M+M*NB)
02855 *
02856                      CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
02857      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02858                      IWORK = IE + M
02859 *
02860 *                    Perform bidiagonal QR iteration, computing left
02861 *                    singular vectors of A in U and computing right
02862 *                    singular vectors of A in VT
02863 *                    (Workspace: need BDSPAC)
02864 *
02865                      CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
02866      $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
02867      $                            INFO )
02868 *
02869                   END IF
02870 *
02871                END IF
02872 *
02873             ELSE IF( WNTVA ) THEN
02874 *
02875                IF( WNTUN ) THEN
02876 *
02877 *                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
02878 *                 N right singular vectors to be computed in VT and
02879 *                 no left singular vectors to be computed
02880 *
02881                   IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
02882 *
02883 *                    Sufficient workspace for a fast algorithm
02884 *
02885                      IR = 1
02886                      IF( LWORK.GE.WRKBL+LDA*M ) THEN
02887 *
02888 *                       WORK(IR) is LDA by M
02889 *
02890                         LDWRKR = LDA
02891                      ELSE
02892 *
02893 *                       WORK(IR) is M by M
02894 *
02895                         LDWRKR = M
02896                      END IF
02897                      ITAU = IR + LDWRKR*M
02898                      IWORK = ITAU + M
02899 *
02900 *                    Compute A=L*Q, copying result to VT
02901 *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
02902 *
02903                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
02904      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02905                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
02906 *
02907 *                    Copy L to WORK(IR), zeroing out above it
02908 *
02909                      CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ),
02910      $                            LDWRKR )
02911                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
02912      $                            WORK( IR+LDWRKR ), LDWRKR )
02913 *
02914 *                    Generate Q in VT
02915 *                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
02916 *
02917                      CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
02918      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02919                      IE = ITAU
02920                      ITAUQ = IE + M
02921                      ITAUP = ITAUQ + M
02922                      IWORK = ITAUP + M
02923 *
02924 *                    Bidiagonalize L in WORK(IR)
02925 *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
02926 *
02927                      CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S,
02928      $                            WORK( IE ), WORK( ITAUQ ),
02929      $                            WORK( ITAUP ), WORK( IWORK ),
02930      $                            LWORK-IWORK+1, IERR )
02931 *
02932 *                    Generate right bidiagonalizing vectors in WORK(IR)
02933 *                    (Workspace: need M*M+4*M-1,
02934 *                                prefer M*M+3*M+(M-1)*NB)
02935 *
02936                      CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
02937      $                            WORK( ITAUP ), WORK( IWORK ),
02938      $                            LWORK-IWORK+1, IERR )
02939                      IWORK = IE + M
02940 *
02941 *                    Perform bidiagonal QR iteration, computing right
02942 *                    singular vectors of L in WORK(IR)
02943 *                    (Workspace: need M*M+BDSPAC)
02944 *
02945                      CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
02946      $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
02947      $                            WORK( IWORK ), INFO )
02948 *
02949 *                    Multiply right singular vectors of L in WORK(IR) by
02950 *                    Q in VT, storing result in A
02951 *                    (Workspace: need M*M)
02952 *
02953                      CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
02954      $                           LDWRKR, VT, LDVT, ZERO, A, LDA )
02955 *
02956 *                    Copy right singular vectors of A from A to VT
02957 *
02958                      CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT )
02959 *
02960                   ELSE
02961 *
02962 *                    Insufficient workspace for a fast algorithm
02963 *
02964                      ITAU = 1
02965                      IWORK = ITAU + M
02966 *
02967 *                    Compute A=L*Q, copying result to VT
02968 *                    (Workspace: need 2*M, prefer M+M*NB)
02969 *
02970                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
02971      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02972                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
02973 *
02974 *                    Generate Q in VT
02975 *                    (Workspace: need M+N, prefer M+N*NB)
02976 *
02977                      CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
02978      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02979                      IE = ITAU
02980                      ITAUQ = IE + M
02981                      ITAUP = ITAUQ + M
02982                      IWORK = ITAUP + M
02983 *
02984 *                    Zero out above L in A
02985 *
02986                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
02987      $                            LDA )
02988 *
02989 *                    Bidiagonalize L in A
02990 *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
02991 *
02992                      CALL SGEBRD( M, M, A, LDA, S, WORK( IE ),
02993      $                            WORK( ITAUQ ), WORK( ITAUP ),
02994      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02995 *
02996 *                    Multiply right bidiagonalizing vectors in A by Q
02997 *                    in VT
02998 *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
02999 *
03000                      CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
03001      $                            WORK( ITAUP ), VT, LDVT,
03002      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03003                      IWORK = IE + M
03004 *
03005 *                    Perform bidiagonal QR iteration, computing right
03006 *                    singular vectors of A in VT
03007 *                    (Workspace: need BDSPAC)
03008 *
03009                      CALL SBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
03010      $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
03011      $                            INFO )
03012 *
03013                   END IF
03014 *
03015                ELSE IF( WNTUO ) THEN
03016 *
03017 *                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
03018 *                 N right singular vectors to be computed in VT and
03019 *                 M left singular vectors to be overwritten on A
03020 *
03021                   IF( LWORK.GE.2*M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
03022 *
03023 *                    Sufficient workspace for a fast algorithm
03024 *
03025                      IU = 1
03026                      IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
03027 *
03028 *                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
03029 *
03030                         LDWRKU = LDA
03031                         IR = IU + LDWRKU*M
03032                         LDWRKR = LDA
03033                      ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
03034 *
03035 *                       WORK(IU) is LDA by M and WORK(IR) is M by M
03036 *
03037                         LDWRKU = LDA
03038                         IR = IU + LDWRKU*M
03039                         LDWRKR = M
03040                      ELSE
03041 *
03042 *                       WORK(IU) is M by M and WORK(IR) is M by M
03043 *
03044                         LDWRKU = M
03045                         IR = IU + LDWRKU*M
03046                         LDWRKR = M
03047                      END IF
03048                      ITAU = IR + LDWRKR*M
03049                      IWORK = ITAU + M
03050 *
03051 *                    Compute A=L*Q, copying result to VT
03052 *                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
03053 *
03054                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
03055      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03056                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
03057 *
03058 *                    Generate Q in VT
03059 *                    (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
03060 *
03061                      CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
03062      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03063 *
03064 *                    Copy L to WORK(IU), zeroing out above it
03065 *
03066                      CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ),
03067      $                            LDWRKU )
03068                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
03069      $                            WORK( IU+LDWRKU ), LDWRKU )
03070                      IE = ITAU
03071                      ITAUQ = IE + M
03072                      ITAUP = ITAUQ + M
03073                      IWORK = ITAUP + M
03074 *
03075 *                    Bidiagonalize L in WORK(IU), copying result to
03076 *                    WORK(IR)
03077 *                    (Workspace: need 2*M*M+4*M,
03078 *                                prefer 2*M*M+3*M+2*M*NB)
03079 *
03080                      CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S,
03081      $                            WORK( IE ), WORK( ITAUQ ),
03082      $                            WORK( ITAUP ), WORK( IWORK ),
03083      $                            LWORK-IWORK+1, IERR )
03084                      CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU,
03085      $                            WORK( IR ), LDWRKR )
03086 *
03087 *                    Generate right bidiagonalizing vectors in WORK(IU)
03088 *                    (Workspace: need 2*M*M+4*M-1,
03089 *                                prefer 2*M*M+3*M+(M-1)*NB)
03090 *
03091                      CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
03092      $                            WORK( ITAUP ), WORK( IWORK ),
03093      $                            LWORK-IWORK+1, IERR )
03094 *
03095 *                    Generate left bidiagonalizing vectors in WORK(IR)
03096 *                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
03097 *
03098                      CALL SORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
03099      $                            WORK( ITAUQ ), WORK( IWORK ),
03100      $                            LWORK-IWORK+1, IERR )
03101                      IWORK = IE + M
03102 *
03103 *                    Perform bidiagonal QR iteration, computing left
03104 *                    singular vectors of L in WORK(IR) and computing
03105 *                    right singular vectors of L in WORK(IU)
03106 *                    (Workspace: need 2*M*M+BDSPAC)
03107 *
03108                      CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
03109      $                            WORK( IU ), LDWRKU, WORK( IR ),
03110      $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
03111 *
03112 *                    Multiply right singular vectors of L in WORK(IU) by
03113 *                    Q in VT, storing result in A
03114 *                    (Workspace: need M*M)
03115 *
03116                      CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
03117      $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
03118 *
03119 *                    Copy right singular vectors of A from A to VT
03120 *
03121                      CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT )
03122 *
03123 *                    Copy left singular vectors of A from WORK(IR) to A
03124 *
03125                      CALL SLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
03126      $                            LDA )
03127 *
03128                   ELSE
03129 *
03130 *                    Insufficient workspace for a fast algorithm
03131 *
03132                      ITAU = 1
03133                      IWORK = ITAU + M
03134 *
03135 *                    Compute A=L*Q, copying result to VT
03136 *                    (Workspace: need 2*M, prefer M+M*NB)
03137 *
03138                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
03139      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03140                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
03141 *
03142 *                    Generate Q in VT
03143 *                    (Workspace: need M+N, prefer M+N*NB)
03144 *
03145                      CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
03146      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03147                      IE = ITAU
03148                      ITAUQ = IE + M
03149                      ITAUP = ITAUQ + M
03150                      IWORK = ITAUP + M
03151 *
03152 *                    Zero out above L in A
03153 *
03154                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
03155      $                            LDA )
03156 *
03157 *                    Bidiagonalize L in A
03158 *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
03159 *
03160                      CALL SGEBRD( M, M, A, LDA, S, WORK( IE ),
03161      $                            WORK( ITAUQ ), WORK( ITAUP ),
03162      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03163 *
03164 *                    Multiply right bidiagonalizing vectors in A by Q
03165 *                    in VT
03166 *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
03167 *
03168                      CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
03169      $                            WORK( ITAUP ), VT, LDVT,
03170      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03171 *
03172 *                    Generate left bidiagonalizing vectors in A
03173 *                    (Workspace: need 4*M, prefer 3*M+M*NB)
03174 *
03175                      CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
03176      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03177                      IWORK = IE + M
03178 *
03179 *                    Perform bidiagonal QR iteration, computing left
03180 *                    singular vectors of A in A and computing right
03181 *                    singular vectors of A in VT
03182 *                    (Workspace: need BDSPAC)
03183 *
03184                      CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
03185      $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
03186      $                            INFO )
03187 *
03188                   END IF
03189 *
03190                ELSE IF( WNTUAS ) THEN
03191 *
03192 *                 Path 9t(N much larger than M, JOBU='S' or 'A',
03193 *                         JOBVT='A')
03194 *                 N right singular vectors to be computed in VT and
03195 *                 M left singular vectors to be computed in U
03196 *
03197                   IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
03198 *
03199 *                    Sufficient workspace for a fast algorithm
03200 *
03201                      IU = 1
03202                      IF( LWORK.GE.WRKBL+LDA*M ) THEN
03203 *
03204 *                       WORK(IU) is LDA by M
03205 *
03206                         LDWRKU = LDA
03207                      ELSE
03208 *
03209 *                       WORK(IU) is M by M
03210 *
03211                         LDWRKU = M
03212                      END IF
03213                      ITAU = IU + LDWRKU*M
03214                      IWORK = ITAU + M
03215 *
03216 *                    Compute A=L*Q, copying result to VT
03217 *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
03218 *
03219                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
03220      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03221                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
03222 *
03223 *                    Generate Q in VT
03224 *                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
03225 *
03226                      CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
03227      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03228 *
03229 *                    Copy L to WORK(IU), zeroing out above it
03230 *
03231                      CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ),
03232      $                            LDWRKU )
03233                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
03234      $                            WORK( IU+LDWRKU ), LDWRKU )
03235                      IE = ITAU
03236                      ITAUQ = IE + M
03237                      ITAUP = ITAUQ + M
03238                      IWORK = ITAUP + M
03239 *
03240 *                    Bidiagonalize L in WORK(IU), copying result to U
03241 *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
03242 *
03243                      CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S,
03244      $                            WORK( IE ), WORK( ITAUQ ),
03245      $                            WORK( ITAUP ), WORK( IWORK ),
03246      $                            LWORK-IWORK+1, IERR )
03247                      CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
03248      $                            LDU )
03249 *
03250 *                    Generate right bidiagonalizing vectors in WORK(IU)
03251 *                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
03252 *
03253                      CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
03254      $                            WORK( ITAUP ), WORK( IWORK ),
03255      $                            LWORK-IWORK+1, IERR )
03256 *
03257 *                    Generate left bidiagonalizing vectors in U
03258 *                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
03259 *
03260                      CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
03261      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03262                      IWORK = IE + M
03263 *
03264 *                    Perform bidiagonal QR iteration, computing left
03265 *                    singular vectors of L in U and computing right
03266 *                    singular vectors of L in WORK(IU)
03267 *                    (Workspace: need M*M+BDSPAC)
03268 *
03269                      CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
03270      $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
03271      $                            WORK( IWORK ), INFO )
03272 *
03273 *                    Multiply right singular vectors of L in WORK(IU) by
03274 *                    Q in VT, storing result in A
03275 *                    (Workspace: need M*M)
03276 *
03277                      CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
03278      $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
03279 *
03280 *                    Copy right singular vectors of A from A to VT
03281 *
03282                      CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT )
03283 *
03284                   ELSE
03285 *
03286 *                    Insufficient workspace for a fast algorithm
03287 *
03288                      ITAU = 1
03289                      IWORK = ITAU + M
03290 *
03291 *                    Compute A=L*Q, copying result to VT
03292 *                    (Workspace: need 2*M, prefer M+M*NB)
03293 *
03294                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
03295      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03296                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
03297 *
03298 *                    Generate Q in VT
03299 *                    (Workspace: need M+N, prefer M+N*NB)
03300 *
03301                      CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
03302      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03303 *
03304 *                    Copy L to U, zeroing out above it
03305 *
03306                      CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
03307                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
03308      $                            LDU )
03309                      IE = ITAU
03310                      ITAUQ = IE + M
03311                      ITAUP = ITAUQ + M
03312                      IWORK = ITAUP + M
03313 *
03314 *                    Bidiagonalize L in U
03315 *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
03316 *
03317                      CALL SGEBRD( M, M, U, LDU, S, WORK( IE ),
03318      $                            WORK( ITAUQ ), WORK( ITAUP ),
03319      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03320 *
03321 *                    Multiply right bidiagonalizing vectors in U by Q
03322 *                    in VT
03323 *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
03324 *
03325                      CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
03326      $                            WORK( ITAUP ), VT, LDVT,
03327      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03328 *
03329 *                    Generate left bidiagonalizing vectors in U
03330 *                    (Workspace: need 4*M, prefer 3*M+M*NB)
03331 *
03332                      CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
03333      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03334                      IWORK = IE + M
03335 *
03336 *                    Perform bidiagonal QR iteration, computing left
03337 *                    singular vectors of A in U and computing right
03338 *                    singular vectors of A in VT
03339 *                    (Workspace: need BDSPAC)
03340 *
03341                      CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
03342      $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
03343      $                            INFO )
03344 *
03345                   END IF
03346 *
03347                END IF
03348 *
03349             END IF
03350 *
03351          ELSE
03352 *
03353 *           N .LT. MNTHR
03354 *
03355 *           Path 10t(N greater than M, but not much larger)
03356 *           Reduce to bidiagonal form without LQ decomposition
03357 *
03358             IE = 1
03359             ITAUQ = IE + M
03360             ITAUP = ITAUQ + M
03361             IWORK = ITAUP + M
03362 *
03363 *           Bidiagonalize A
03364 *           (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
03365 *
03366             CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
03367      $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
03368      $                   IERR )
03369             IF( WNTUAS ) THEN
03370 *
03371 *              If left singular vectors desired in U, copy result to U
03372 *              and generate left bidiagonalizing vectors in U
03373 *              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
03374 *
03375                CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
03376                CALL SORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
03377      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
03378             END IF
03379             IF( WNTVAS ) THEN
03380 *
03381 *              If right singular vectors desired in VT, copy result to
03382 *              VT and generate right bidiagonalizing vectors in VT
03383 *              (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
03384 *
03385                CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
03386                IF( WNTVA )
03387      $            NRVT = N
03388                IF( WNTVS )
03389      $            NRVT = M
03390                CALL SORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
03391      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
03392             END IF
03393             IF( WNTUO ) THEN
03394 *
03395 *              If left singular vectors desired in A, generate left
03396 *              bidiagonalizing vectors in A
03397 *              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
03398 *
03399                CALL SORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
03400      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
03401             END IF
03402             IF( WNTVO ) THEN
03403 *
03404 *              If right singular vectors desired in A, generate right
03405 *              bidiagonalizing vectors in A
03406 *              (Workspace: need 4*M, prefer 3*M+M*NB)
03407 *
03408                CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
03409      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
03410             END IF
03411             IWORK = IE + M
03412             IF( WNTUAS .OR. WNTUO )
03413      $         NRU = M
03414             IF( WNTUN )
03415      $         NRU = 0
03416             IF( WNTVAS .OR. WNTVO )
03417      $         NCVT = N
03418             IF( WNTVN )
03419      $         NCVT = 0
03420             IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
03421 *
03422 *              Perform bidiagonal QR iteration, if desired, computing
03423 *              left singular vectors in U and computing right singular
03424 *              vectors in VT
03425 *              (Workspace: need BDSPAC)
03426 *
03427                CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
03428      $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
03429             ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
03430 *
03431 *              Perform bidiagonal QR iteration, if desired, computing
03432 *              left singular vectors in U and computing right singular
03433 *              vectors in A
03434 *              (Workspace: need BDSPAC)
03435 *
03436                CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
03437      $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
03438             ELSE
03439 *
03440 *              Perform bidiagonal QR iteration, if desired, computing
03441 *              left singular vectors in A and computing right singular
03442 *              vectors in VT
03443 *              (Workspace: need BDSPAC)
03444 *
03445                CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
03446      $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
03447             END IF
03448 *
03449          END IF
03450 *
03451       END IF
03452 *
03453 *     If SBDSQR failed to converge, copy unconverged superdiagonals
03454 *     to WORK( 2:MINMN )
03455 *
03456       IF( INFO.NE.0 ) THEN
03457          IF( IE.GT.2 ) THEN
03458             DO 50 I = 1, MINMN - 1
03459                WORK( I+1 ) = WORK( I+IE-1 )
03460    50       CONTINUE
03461          END IF
03462          IF( IE.LT.2 ) THEN
03463             DO 60 I = MINMN - 1, 1, -1
03464                WORK( I+1 ) = WORK( I+IE-1 )
03465    60       CONTINUE
03466          END IF
03467       END IF
03468 *
03469 *     Undo scaling if necessary
03470 *
03471       IF( ISCL.EQ.1 ) THEN
03472          IF( ANRM.GT.BIGNUM )
03473      $      CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
03474      $                   IERR )
03475          IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
03476      $      CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ),
03477      $                   MINMN, IERR )
03478          IF( ANRM.LT.SMLNUM )
03479      $      CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
03480      $                   IERR )
03481          IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
03482      $      CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ),
03483      $                   MINMN, IERR )
03484       END IF
03485 *
03486 *     Return optimal workspace in WORK(1)
03487 *
03488       WORK( 1 ) = MAXWRK
03489 *
03490       RETURN
03491 *
03492 *     End of SGESVD
03493 *
03494       END
 All Files Functions