#include <iomanip>
#include <cassert>
#include "blas.h"

class Vector;
////////////////////////////////////////////////////////////////////////
//// This header contains simple implementation of class matrix     ////
////////////////////////////////////////////////////////////////////////
class Matrix{
  int N, Nd; // size of the matrix m[N,Nd]
  double *m; // pointer to data
public:
  ///// constructors & destructor
  Matrix();                               // default constructor is called when no arguments are given
  Matrix(int N, int Nd, double def_val=0);// constructor takes size as an argument
  Matrix(const Matrix& m);                // copy constructor necessary for returning by value
  ~Matrix();                              // destructor necessary for deallocation
  ///// other class members  
  double operator()(int i, int j) const;  // grouping operator can be overload to take many arguments
  double& operator()(int i, int j);       // note that operator access ([]) can not take more than one argument
  Matrix& operator= (const Matrix& m);    // to assign A=B
  int size0() const {return N;}           // first dimension
  int size1() const {return Nd;}          // and second dimension
  
  void resize(int N, int Nd);             // matrix can be resized at any point in code, the data is lost, however
  // Optimized call to blas routine to multiply matrix. Does not require any temporary objects and should be fast
  void Product(const Matrix& A, const Matrix& B, const std::string& transa, const std::string& transb, double alpha, double beta);
  void random();                          // fills the matrix with random numbers
  // friend function can access the private data of the class )
  friend int Eigenvalues(Matrix& H, Vector& Er, Vector& Ei);  
  friend Matrix operator* (const Matrix& A, const Matrix& B); // for C=A*B. Should be slover than function 'Product' because it requires temporary objects
  
  friend Vector operator* (const Matrix& A, const Vector& b); // for c=A*b matrix*vector
};

// Default constructor creates empty matrix
Matrix::Matrix() : N(0), Nd(0), m(NULL){};

// Given initial size and default value, metrix is initialized
Matrix::Matrix(int N_, int Nd_, double def_val) : N(N_), Nd(Nd_)
{
  m = new double[N*Nd];
  for (int i=0; i<N*Nd; i++) m[i] = def_val;
}

// Copy constructor needed for return by value
Matrix::Matrix(const Matrix& A) : N(0), Nd(0)
{
  resize(A.N, A.Nd);
  std::copy(A.m, A.m+N*Nd, m);
}

// Destructor deallocates the memory
Matrix::~Matrix()
{  delete[] m; }

// access operator. It uses assert macro which checks bounds
// in debug mode and ignores in release mode (-DNDEBUG)
inline double Matrix::operator()(int i, int j) const
{
  assert(i<N && j<Nd && i>=0 && j>=0);// Reading out of bounds
  return m[Nd*i+j];
}

// access operator for writing
inline double& Matrix::operator()(int i, int j)
{
  assert(i<N && j<Nd && i>=0 && j>=0);// Writting out of bounds
  return m[Nd*i+j];
}

// resizing matrix by destruction of data
void Matrix::resize(int N_, int Nd_)
{
  if (N_*Nd_ > N*Nd){     // need to deallocate
    N = N_; Nd = Nd_;     
    delete[] m;           // releases the old memory
    m = new double[N*Nd]; // and creates new peace of memory
  }else{
    N = N_;               // only changes size
    Nd = Nd_;             // no need to deallocate
  }
  // Note that better class would need to carry information about temporary size (N) and
  // actual size (let's say N0). The allocation/deallocation should occur only if
  // desired new size of vector is bigger than actual size N0 (not temporary size N).
}

// operator= is called in cases like A=B or A=B*C
Matrix& Matrix::operator= (const Matrix& A)
{
  resize(A.N, A.Nd);
  std::copy(A.m, A.m+N*Nd, m); // just copies whole chunk of data for performance reasons
}

// Fills matrix with random values
void Matrix::random()
{
  for (int i=0; i<size0(); i++)
    for (int j=0; j<size1(); j++)
      operator()(i,j) = drand48();
}

// Prints matrix in Python-like fashion
std::ostream& operator<<(std::ostream& stream, const Matrix& m)
{
  stream<<"[[";
  for (int j=0; j<m.size1(); j++){
      stream <<std::setw(10)<<m(0,j);
      stream << ((j<m.size1()-1) ? ", " : "], \\");
  }
  stream << std::endl;
  for (int i=1; i<m.size0()-1; i++){
    stream<<" [";
    for (int j=0; j<m.size1(); j++){
      stream<<std::setw(10)<<m(i,j);
      stream << ((j!=m.size1()-1) ? ", " : "], \\");
    }
    stream<<std::endl;
  }
  stream<<" [";
  for (int j=0; j<m.size1(); j++){
    stream<<std::setw(10)<<m(m.size0()-1,j);
    stream << ((j!=m.size1()-1) ? ", " : "]");
  }
  stream<<"]"<<std::endl;
  return stream; // Be careful! You have to return stream otherwise it will coredump!
}

// Implements matrix multiplication by calling lapack library
// Does not require any temporary object and should be very fast
inline void Matrix::Product(const Matrix& A, const Matrix& B, const std::string& transa="N", const std::string& transb="N", double alpha=1, double beta=0)
{
  if (transa!="N" && transa!="T" && transa!="C"){std::cerr<<"Did not recognize your task. Specify how to multiply matrices in dgemm!"<<std::endl; return;}
  
  if(transa=="N" && transb=="N"){
    if (A.Nd != B.N || !B.Nd || !A.N || !A.Nd || !B.N)
      std::cerr << " Matrix sizes not correct" << std::endl;
    resize(A.N, B.Nd);
    xgemm(transb, transa, B.Nd, A.N, B.N, alpha, B.m, B.Nd, A.m, A.Nd, beta, m, Nd);
    N = A.N; Nd = B.Nd;
  }else if((transa=="T"||transa=="C") && transb=="N"){
    if (A.N != B.N || !B.Nd || !A.Nd || !A.N || !B.N)
      std::cerr << " Matrix sizes not correct" << std::endl;
    resize(A.Nd, B.Nd);
    xgemm(transb, transa, B.Nd, A.Nd, B.N, alpha, B.m, B.Nd, A.m, A.Nd, beta, m, Nd);
    N = A.Nd; Nd = B.Nd;
  }else if (transa=="N" && (transb=="T"||transb=="C")){
    if (A.Nd != B.Nd || !B.N || !A.N || !A.Nd || !B.Nd)
      std::cerr << " Matrix sizes not correct" << std::endl;
    resize(A.N, B.N);
    xgemm(transb, transa, B.N, A.N, B.Nd, alpha, B.m, B.Nd, A.m, A.Nd, beta, m, Nd);
    N = A.N; Nd = B.N;
  }else if ((transa=="T"||transa=="C") && (transb=="T"||transb=="C")){
    if (A.N != B.Nd || !B.N || !A.N || !A.Nd || !B.Nd)
      std::cerr << " Matrix sizes not correct" << std::endl;
    resize(A.Nd, B.N);
    xgemm(transb, transa, B.N, A.Nd, B.Nd, alpha, B.m, B.Nd, A.m, A.Nd, beta, m, Nd);
    N = A.Nd; Nd = B.N;
  }
}

// friend operator A*B is slower because it needs few
// temporary objects
Matrix operator* (const Matrix& A, const Matrix& B)
{
  Matrix C; // temporary created
  C.Product(A, B);
  return C; // another temporary created to copy data
}


