LAPACK  3.4.1
LAPACK: Linear Algebra PACKage
dgetrf.f
Go to the documentation of this file.
00001 C> \brief \b DGETRF VARIANT: iterative version of Sivan Toledo's recursive LU algorithm
00002 *
00003 *  =========== DOCUMENTATION ===========
00004 *
00005 * Online html documentation available at 
00006 *            http://www.netlib.org/lapack/explore-html/ 
00007 *
00008 *  Definition:
00009 *  ===========
00010 *
00011 *       SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO )
00012 * 
00013 *       .. Scalar Arguments ..
00014 *       INTEGER            INFO, LDA, M, N
00015 *       ..
00016 *       .. Array Arguments ..
00017 *       INTEGER            IPIV( * )
00018 *       DOUBLE PRECISION   A( LDA, * )
00019 *       ..
00020 *  
00021 *  Purpose
00022 *  =======
00023 *
00024 C>\details \b Purpose:
00025 C>\verbatim
00026 C>
00027 C> DGETRF computes an LU factorization of a general M-by-N matrix A
00028 C> using partial pivoting with row interchanges.
00029 C>
00030 C> The factorization has the form
00031 C>    A = P * L * U
00032 C> where P is a permutation matrix, L is lower triangular with unit
00033 C> diagonal elements (lower trapezoidal if m > n), and U is upper
00034 C> triangular (upper trapezoidal if m < n).
00035 C>
00036 C> This code implements an iterative version of Sivan Toledo's recursive
00037 C> LU algorithm[1].  For square matrices, this iterative versions should
00038 C> be within a factor of two of the optimum number of memory transfers.
00039 C>
00040 C> The pattern is as follows, with the large blocks of U being updated
00041 C> in one call to DTRSM, and the dotted lines denoting sections that
00042 C> have had all pending permutations applied:
00043 C>
00044 C>  1 2 3 4 5 6 7 8
00045 C> +-+-+---+-------+------
00046 C> | |1|   |       |
00047 C> |.+-+ 2 |       |
00048 C> | | |   |       |
00049 C> |.|.+-+-+   4   |
00050 C> | | | |1|       |
00051 C> | | |.+-+       |
00052 C> | | | | |       |
00053 C> |.|.|.|.+-+-+---+  8
00054 C> | | | | | |1|   |
00055 C> | | | | |.+-+ 2 |
00056 C> | | | | | | |   |
00057 C> | | | | |.|.+-+-+
00058 C> | | | | | | | |1|
00059 C> | | | | | | |.+-+
00060 C> | | | | | | | | |
00061 C> |.|.|.|.|.|.|.|.+-----
00062 C> | | | | | | | | |
00063 C>
00064 C> The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in
00065 C> the binary expansion of the current column.  Each Schur update is
00066 C> applied as soon as the necessary portion of U is available.
00067 C>
00068 C> [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with
00069 C> Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997),
00070 C> 1065-1081. http://dx.doi.org/10.1137/S0895479896297744
00071 C>
00072 C>\endverbatim
00073 *
00074 *  Arguments:
00075 *  ==========
00076 *
00077 C> \param[in] M
00078 C> \verbatim
00079 C>          M is INTEGER
00080 C>          The number of rows of the matrix A.  M >= 0.
00081 C> \endverbatim
00082 C>
00083 C> \param[in] N
00084 C> \verbatim
00085 C>          N is INTEGER
00086 C>          The number of columns of the matrix A.  N >= 0.
00087 C> \endverbatim
00088 C>
00089 C> \param[in,out] A
00090 C> \verbatim
00091 C>          A is DOUBLE PRECISION array, dimension (LDA,N)
00092 C>          On entry, the M-by-N matrix to be factored.
00093 C>          On exit, the factors L and U from the factorization
00094 C>          A = P*L*U; the unit diagonal elements of L are not stored.
00095 C> \endverbatim
00096 C>
00097 C> \param[in] LDA
00098 C> \verbatim
00099 C>          LDA is INTEGER
00100 C>          The leading dimension of the array A.  LDA >= max(1,M).
00101 C> \endverbatim
00102 C>
00103 C> \param[out] IPIV
00104 C> \verbatim
00105 C>          IPIV is INTEGER array, dimension (min(M,N))
00106 C>          The pivot indices; for 1 <= i <= min(M,N), row i of the
00107 C>          matrix was interchanged with row IPIV(i).
00108 C> \endverbatim
00109 C>
00110 C> \param[out] INFO
00111 C> \verbatim
00112 C>          INFO is INTEGER
00113 C>          = 0:  successful exit
00114 C>          < 0:  if INFO = -i, the i-th argument had an illegal value
00115 C>          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
00116 C>                has been completed, but the factor U is exactly
00117 C>                singular, and division by zero will occur if it is used
00118 C>                to solve a system of equations.
00119 C> \endverbatim
00120 C>
00121 *
00122 *  Authors:
00123 *  ========
00124 *
00125 C> \author Univ. of Tennessee 
00126 C> \author Univ. of California Berkeley 
00127 C> \author Univ. of Colorado Denver 
00128 C> \author NAG Ltd. 
00129 *
00130 C> \date November 2011
00131 *
00132 C> \ingroup variantsGEcomputational
00133 *
00134 *  =====================================================================
00135       SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO )
00136 *
00137 *  -- LAPACK computational routine (version 3.X) --
00138 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
00139 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
00140 *     November 2011
00141 *
00142 *     .. Scalar Arguments ..
00143       INTEGER            INFO, LDA, M, N
00144 *     ..
00145 *     .. Array Arguments ..
00146       INTEGER            IPIV( * )
00147       DOUBLE PRECISION   A( LDA, * )
00148 *     ..
00149 *
00150 *  =====================================================================
00151 *
00152 *     .. Parameters ..
00153       DOUBLE PRECISION   ONE, ZERO, NEGONE
00154       PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
00155       PARAMETER          ( NEGONE = -1.0D+0 )
00156 *     ..
00157 *     .. Local Scalars ..
00158       DOUBLE PRECISION   SFMIN, TMP
00159       INTEGER            I, J, JP, NSTEP, NTOPIV, NPIVED, KAHEAD
00160       INTEGER            KSTART, IPIVSTART, JPIVSTART, KCOLS
00161 *     ..
00162 *     .. External Functions ..
00163       DOUBLE PRECISION   DLAMCH
00164       INTEGER            IDAMAX
00165       LOGICAL            DISNAN
00166       EXTERNAL           DLAMCH, IDAMAX, DISNAN
00167 *     ..
00168 *     .. External Subroutines ..
00169       EXTERNAL           DTRSM, DSCAL, XERBLA, DLASWP
00170 *     ..
00171 *     .. Intrinsic Functions ..
00172       INTRINSIC          MAX, MIN, IAND
00173 *     ..
00174 *     .. Executable Statements ..
00175 *
00176 *     Test the input parameters.
00177 *
00178       INFO = 0
00179       IF( M.LT.0 ) THEN
00180          INFO = -1
00181       ELSE IF( N.LT.0 ) THEN
00182          INFO = -2
00183       ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
00184          INFO = -4
00185       END IF
00186       IF( INFO.NE.0 ) THEN
00187          CALL XERBLA( 'DGETRF', -INFO )
00188          RETURN
00189       END IF
00190 *
00191 *     Quick return if possible
00192 *
00193       IF( M.EQ.0 .OR. N.EQ.0 )
00194      $   RETURN
00195 *
00196 *     Compute machine safe minimum
00197 *
00198       SFMIN = DLAMCH( 'S' )
00199 *
00200       NSTEP = MIN( M, N )
00201       DO J = 1, NSTEP
00202          KAHEAD = IAND( J, -J )
00203          KSTART = J + 1 - KAHEAD
00204          KCOLS = MIN( KAHEAD, M-J )
00205 *
00206 *        Find pivot.
00207 *
00208          JP = J - 1 + IDAMAX( M-J+1, A( J, J ), 1 )
00209          IPIV( J ) = JP
00210 
00211 *        Permute just this column.
00212          IF (JP .NE. J) THEN
00213             TMP = A( J, J )
00214             A( J, J ) = A( JP, J )
00215             A( JP, J ) = TMP
00216          END IF
00217 
00218 *        Apply pending permutations to L
00219          NTOPIV = 1
00220          IPIVSTART = J
00221          JPIVSTART = J - NTOPIV
00222          DO WHILE ( NTOPIV .LT. KAHEAD )
00223             CALL DLASWP( NTOPIV, A( 1, JPIVSTART ), LDA, IPIVSTART, J,
00224      $           IPIV, 1 )
00225             IPIVSTART = IPIVSTART - NTOPIV;
00226             NTOPIV = NTOPIV * 2;
00227             JPIVSTART = JPIVSTART - NTOPIV;
00228          END DO
00229 
00230 *        Permute U block to match L
00231          CALL DLASWP( KCOLS, A( 1,J+1 ), LDA, KSTART, J, IPIV, 1 )
00232 
00233 *        Factor the current column
00234          IF( A( J, J ).NE.ZERO .AND. .NOT.DISNAN( A( J, J ) ) ) THEN
00235                IF( ABS(A( J, J )) .GE. SFMIN ) THEN
00236                   CALL DSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 )
00237                ELSE
00238                  DO I = 1, M-J
00239                     A( J+I, J ) = A( J+I, J ) / A( J, J )
00240                  END DO
00241                END IF
00242          ELSE IF( A( J,J ) .EQ. ZERO .AND. INFO .EQ. 0 ) THEN
00243             INFO = J
00244          END IF
00245 
00246 *        Solve for U block.
00247          CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', KAHEAD,
00248      $        KCOLS, ONE, A( KSTART, KSTART ), LDA,
00249      $        A( KSTART, J+1 ), LDA )
00250 *        Schur complement.
00251          CALL DGEMM( 'No transpose', 'No transpose', M-J,
00252      $        KCOLS, KAHEAD, NEGONE, A( J+1, KSTART ), LDA,
00253      $        A( KSTART, J+1 ), LDA, ONE, A( J+1, J+1 ), LDA )
00254       END DO
00255 
00256 *     Handle pivot permutations on the way out of the recursion
00257       NPIVED = IAND( NSTEP, -NSTEP )
00258       J = NSTEP - NPIVED
00259       DO WHILE ( J .GT. 0 )
00260          NTOPIV = IAND( J, -J )
00261          CALL DLASWP( NTOPIV, A( 1, J-NTOPIV+1 ), LDA, J+1, NSTEP,
00262      $        IPIV, 1 )
00263          J = J - NTOPIV
00264       END DO
00265 
00266 *     If short and wide, handle the rest of the columns.
00267       IF ( M .LT. N ) THEN
00268          CALL DLASWP( N-M, A( 1, M+KCOLS+1 ), LDA, 1, M, IPIV, 1 )
00269          CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', M,
00270      $        N-M, ONE, A, LDA, A( 1,M+KCOLS+1 ), LDA )
00271       END IF
00272 
00273       RETURN
00274 *
00275 *     End of DGETRF
00276 *
00277       END
 All Files Functions