// Copyright (C) 2008-2015 National ICT Australia (NICTA) // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. // ------------------------------------------------------------------- // // Written by Conrad Sanderson - http://conradsanderson.id.au // Written by Ryan Curtin //! \addtogroup op_strans //! @{ //! for tiny square matrices (size <= 4x4) template arma_hot inline void op_strans::apply_mat_noalias_tinysq(Mat& out, const TA& A) { const eT* Am = A.memptr(); eT* outm = out.memptr(); switch(A.n_rows) { case 1: { outm[0] = Am[0]; } break; case 2: { outm[pos::n2] = Am[pos::n2]; outm[pos::n2] = Am[pos::n2]; outm[pos::n2] = Am[pos::n2]; outm[pos::n2] = Am[pos::n2]; } break; case 3: { outm[pos::n3] = Am[pos::n3]; outm[pos::n3] = Am[pos::n3]; outm[pos::n3] = Am[pos::n3]; outm[pos::n3] = Am[pos::n3]; outm[pos::n3] = Am[pos::n3]; outm[pos::n3] = Am[pos::n3]; outm[pos::n3] = Am[pos::n3]; outm[pos::n3] = Am[pos::n3]; outm[pos::n3] = Am[pos::n3]; } break; case 4: { outm[pos::n4] = Am[pos::n4]; outm[pos::n4] = Am[pos::n4]; outm[pos::n4] = Am[pos::n4]; outm[pos::n4] = Am[pos::n4]; outm[pos::n4] = Am[pos::n4]; outm[pos::n4] = Am[pos::n4]; outm[pos::n4] = Am[pos::n4]; outm[pos::n4] = Am[pos::n4]; outm[pos::n4] = Am[pos::n4]; outm[pos::n4] = Am[pos::n4]; outm[pos::n4] = Am[pos::n4]; outm[pos::n4] = Am[pos::n4]; outm[pos::n4] = Am[pos::n4]; outm[pos::n4] = Am[pos::n4]; outm[pos::n4] = Am[pos::n4]; outm[pos::n4] = Am[pos::n4]; } break; default: ; } } //! Immediate transpose of a dense matrix template arma_hot inline void op_strans::apply_mat_noalias(Mat& out, const TA& A) { arma_extra_debug_sigprint(); const uword A_n_cols = A.n_cols; const uword A_n_rows = A.n_rows; out.set_size(A_n_cols, A_n_rows); if( (TA::is_row) || (TA::is_col) || (A_n_cols == 1) || (A_n_rows == 1) ) { arrayops::copy( out.memptr(), A.memptr(), A.n_elem ); } else { if( (A_n_rows <= 4) && (A_n_rows == A_n_cols) ) { op_strans::apply_mat_noalias_tinysq(out, A); } else { eT* outptr = out.memptr(); for(uword k=0; k < A_n_rows; ++k) { const eT* Aptr = &(A.at(k,0)); uword j; for(j=1; j < A_n_cols; j+=2) { const eT tmp_i = (*Aptr); Aptr += A_n_rows; const eT tmp_j = (*Aptr); Aptr += A_n_rows; (*outptr) = tmp_i; outptr++; (*outptr) = tmp_j; outptr++; } if((j-1) < A_n_cols) { (*outptr) = (*Aptr); outptr++;; } } } } } template arma_hot inline void op_strans::apply_mat_inplace(Mat& out) { arma_extra_debug_sigprint(); const uword n_rows = out.n_rows; const uword n_cols = out.n_cols; if(n_rows == n_cols) { arma_extra_debug_print("op_strans::apply(): doing in-place transpose of a square matrix"); const uword N = n_rows; for(uword k=0; k < N; ++k) { eT* colptr = &(out.at(k,k)); eT* rowptr = colptr; colptr++; rowptr += N; uword j; for(j=(k+2); j < N; j+=2) { std::swap( (*rowptr), (*colptr) ); rowptr += N; colptr++; std::swap( (*rowptr), (*colptr) ); rowptr += N; colptr++; } if((j-1) < N) { std::swap( (*rowptr), (*colptr) ); } } } else { Mat tmp; op_strans::apply_mat_noalias(tmp, out); out.steal_mem(tmp); } } template arma_hot inline void op_strans::apply_mat(Mat& out, const TA& A) { arma_extra_debug_sigprint(); if(&out != &A) { op_strans::apply_mat_noalias(out, A); } else { op_strans::apply_mat_inplace(out); } } template arma_hot inline void op_strans::apply_proxy(Mat& out, const T1& X) { arma_extra_debug_sigprint(); typedef typename T1::elem_type eT; const Proxy P(X); // allow detection of in-place transpose if( (is_Mat::stored_type>::value == true) && (Proxy::fake_mat == false) ) { const unwrap::stored_type> tmp(P.Q); op_strans::apply_mat(out, tmp.M); } else { const uword n_rows = P.get_n_rows(); const uword n_cols = P.get_n_cols(); const bool is_alias = P.is_alias(out); if( (resolves_to_vector::value == true) && (Proxy::prefer_at_accessor == false) ) { if(is_alias == false) { out.set_size(n_cols, n_rows); eT* out_mem = out.memptr(); const uword n_elem = P.get_n_elem(); typename Proxy::ea_type Pea = P.get_ea(); uword i,j; for(i=0, j=1; j < n_elem; i+=2, j+=2) { const eT tmp_i = Pea[i]; const eT tmp_j = Pea[j]; out_mem[i] = tmp_i; out_mem[j] = tmp_j; } if(i < n_elem) { out_mem[i] = Pea[i]; } } else // aliasing { Mat out2(n_cols, n_rows); eT* out_mem = out2.memptr(); const uword n_elem = P.get_n_elem(); typename Proxy::ea_type Pea = P.get_ea(); uword i,j; for(i=0, j=1; j < n_elem; i+=2, j+=2) { const eT tmp_i = Pea[i]; const eT tmp_j = Pea[j]; out_mem[i] = tmp_i; out_mem[j] = tmp_j; } if(i < n_elem) { out_mem[i] = Pea[i]; } out.steal_mem(out2); } } else // general matrix transpose { if(is_alias == false) { out.set_size(n_cols, n_rows); eT* outptr = out.memptr(); for(uword k=0; k < n_rows; ++k) { uword j; for(j=1; j < n_cols; j+=2) { const uword i = j-1; const eT tmp_i = P.at(k,i); const eT tmp_j = P.at(k,j); (*outptr) = tmp_i; outptr++; (*outptr) = tmp_j; outptr++; } const uword i = j-1; if(i < n_cols) { (*outptr) = P.at(k,i); outptr++; } } } else // aliasing { Mat out2(n_cols, n_rows); eT* out2ptr = out2.memptr(); for(uword k=0; k < n_rows; ++k) { uword j; for(j=1; j < n_cols; j+=2) { const uword i = j-1; const eT tmp_i = P.at(k,i); const eT tmp_j = P.at(k,j); (*out2ptr) = tmp_i; out2ptr++; (*out2ptr) = tmp_j; out2ptr++; } const uword i = j-1; if(i < n_cols) { (*out2ptr) = P.at(k,i); out2ptr++; } } out.steal_mem(out2); } } } } template arma_hot inline void op_strans::apply(Mat& out, const Op& in) { arma_extra_debug_sigprint(); op_strans::apply_proxy(out, in.m); } // // op_strans2 //! for tiny square matrices (size <= 4x4) template arma_hot inline void op_strans2::apply_noalias_tinysq(Mat& out, const TA& A, const eT val) { const eT* Am = A.memptr(); eT* outm = out.memptr(); switch(A.n_rows) { case 1: { outm[0] = val * Am[0]; } break; case 2: { outm[pos::n2] = val * Am[pos::n2]; outm[pos::n2] = val * Am[pos::n2]; outm[pos::n2] = val * Am[pos::n2]; outm[pos::n2] = val * Am[pos::n2]; } break; case 3: { outm[pos::n3] = val * Am[pos::n3]; outm[pos::n3] = val * Am[pos::n3]; outm[pos::n3] = val * Am[pos::n3]; outm[pos::n3] = val * Am[pos::n3]; outm[pos::n3] = val * Am[pos::n3]; outm[pos::n3] = val * Am[pos::n3]; outm[pos::n3] = val * Am[pos::n3]; outm[pos::n3] = val * Am[pos::n3]; outm[pos::n3] = val * Am[pos::n3]; } break; case 4: { outm[pos::n4] = val * Am[pos::n4]; outm[pos::n4] = val * Am[pos::n4]; outm[pos::n4] = val * Am[pos::n4]; outm[pos::n4] = val * Am[pos::n4]; outm[pos::n4] = val * Am[pos::n4]; outm[pos::n4] = val * Am[pos::n4]; outm[pos::n4] = val * Am[pos::n4]; outm[pos::n4] = val * Am[pos::n4]; outm[pos::n4] = val * Am[pos::n4]; outm[pos::n4] = val * Am[pos::n4]; outm[pos::n4] = val * Am[pos::n4]; outm[pos::n4] = val * Am[pos::n4]; outm[pos::n4] = val * Am[pos::n4]; outm[pos::n4] = val * Am[pos::n4]; outm[pos::n4] = val * Am[pos::n4]; outm[pos::n4] = val * Am[pos::n4]; } break; default: ; } } template arma_hot inline void op_strans2::apply_noalias(Mat& out, const TA& A, const eT val) { arma_extra_debug_sigprint(); const uword A_n_cols = A.n_cols; const uword A_n_rows = A.n_rows; out.set_size(A_n_cols, A_n_rows); if( (TA::is_col) || (TA::is_row) || (A_n_cols == 1) || (A_n_rows == 1) ) { const uword N = A.n_elem; const eT* A_mem = A.memptr(); eT* out_mem = out.memptr(); uword i,j; for(i=0, j=1; j < N; i+=2, j+=2) { const eT tmp_i = A_mem[i]; const eT tmp_j = A_mem[j]; out_mem[i] = val * tmp_i; out_mem[j] = val * tmp_j; } if(i < N) { out_mem[i] = val * A_mem[i]; } } else { if( (A_n_rows <= 4) && (A_n_rows == A_n_cols) ) { op_strans2::apply_noalias_tinysq(out, A, val); } else { eT* outptr = out.memptr(); for(uword k=0; k < A_n_rows; ++k) { const eT* Aptr = &(A.at(k,0)); uword j; for(j=1; j < A_n_cols; j+=2) { const eT tmp_i = (*Aptr); Aptr += A_n_rows; const eT tmp_j = (*Aptr); Aptr += A_n_rows; (*outptr) = val * tmp_i; outptr++; (*outptr) = val * tmp_j; outptr++; } if((j-1) < A_n_cols) { (*outptr) = val * (*Aptr); outptr++;; } } } } } template arma_hot inline void op_strans2::apply(Mat& out, const TA& A, const eT val) { arma_extra_debug_sigprint(); if(&out != &A) { op_strans2::apply_noalias(out, A, val); } else { const uword n_rows = out.n_rows; const uword n_cols = out.n_cols; if(n_rows == n_cols) { arma_extra_debug_print("op_strans2::apply(): doing in-place transpose of a square matrix"); const uword N = n_rows; // TODO: do multiplication while swapping for(uword k=0; k < N; ++k) { eT* colptr = out.colptr(k); uword i,j; for(i=(k+1), j=(k+2); j < N; i+=2, j+=2) { std::swap(out.at(k,i), colptr[i]); std::swap(out.at(k,j), colptr[j]); } if(i < N) { std::swap(out.at(k,i), colptr[i]); } } arrayops::inplace_mul( out.memptr(), val, out.n_elem ); } else { Mat tmp; op_strans2::apply_noalias(tmp, A, val); out.steal_mem(tmp); } } } template arma_hot inline void op_strans2::apply_proxy(Mat& out, const T1& X, const typename T1::elem_type val) { arma_extra_debug_sigprint(); typedef typename T1::elem_type eT; const Proxy P(X); // allow detection of in-place transpose if( (is_Mat::stored_type>::value == true) && (Proxy::fake_mat == false) ) { const unwrap::stored_type> tmp(P.Q); op_strans2::apply(out, tmp.M, val); } else { const uword n_rows = P.get_n_rows(); const uword n_cols = P.get_n_cols(); const bool is_alias = P.is_alias(out); if( (resolves_to_vector::value == true) && (Proxy::prefer_at_accessor == false) ) { if(is_alias == false) { out.set_size(n_cols, n_rows); eT* out_mem = out.memptr(); const uword n_elem = P.get_n_elem(); typename Proxy::ea_type Pea = P.get_ea(); uword i,j; for(i=0, j=1; j < n_elem; i+=2, j+=2) { const eT tmp_i = Pea[i]; const eT tmp_j = Pea[j]; out_mem[i] = val * tmp_i; out_mem[j] = val * tmp_j; } if(i < n_elem) { out_mem[i] = val * Pea[i]; } } else // aliasing { Mat out2(n_cols, n_rows); eT* out_mem = out2.memptr(); const uword n_elem = P.get_n_elem(); typename Proxy::ea_type Pea = P.get_ea(); uword i,j; for(i=0, j=1; j < n_elem; i+=2, j+=2) { const eT tmp_i = Pea[i]; const eT tmp_j = Pea[j]; out_mem[i] = val * tmp_i; out_mem[j] = val * tmp_j; } if(i < n_elem) { out_mem[i] = val * Pea[i]; } out.steal_mem(out2); } } else // general matrix transpose { if(is_alias == false) { out.set_size(n_cols, n_rows); eT* outptr = out.memptr(); for(uword k=0; k < n_rows; ++k) { uword j; for(j=1; j < n_cols; j+=2) { const uword i = j-1; const eT tmp_i = P.at(k,i); const eT tmp_j = P.at(k,j); (*outptr) = val * tmp_i; outptr++; (*outptr) = val * tmp_j; outptr++; } const uword i = j-1; if(i < n_cols) { (*outptr) = val * P.at(k,i); outptr++; } } } else // aliasing { Mat out2(n_cols, n_rows); eT* out2ptr = out2.memptr(); for(uword k=0; k < n_rows; ++k) { uword j; for(j=1; j < n_cols; j+=2) { const uword i = j-1; const eT tmp_i = P.at(k,i); const eT tmp_j = P.at(k,j); (*out2ptr) = val * tmp_i; out2ptr++; (*out2ptr) = val * tmp_j; out2ptr++; } const uword i = j-1; if(i < n_cols) { (*out2ptr) = val * P.at(k,i); out2ptr++; } } out.steal_mem(out2); } } } } //! @}