diff --git a/.gitignore b/.gitignore index b8bd0267..d5f77595 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,9 @@ *.exe *.out *.app + +# Internal Makefile +Makefile + +# Build +build \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index d4ac2629..b4f4ec5b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,10 @@ cmake_minimum_required(VERSION 3.12) # FindPython cmake_policy(VERSION 3.1) +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") # Blitz version and release date. diff --git a/benchmarks/makeloops.cpp b/benchmarks/makeloops.cpp index cab22342..cc2b95cc 100644 --- a/benchmarks/makeloops.cpp +++ b/benchmarks/makeloops.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include @@ -20,7 +19,7 @@ class loop void reset(); - void parseLoop(istream& is); + void parseLoop(istream &is); int numArrays() const { @@ -42,12 +41,12 @@ class loop return scalars_[i]; } - const char* loopName() const + const char *loopName() const { return loopName_; } - const char* loopBuffer() const + const char *loopBuffer() const { return loopBuffer_; } @@ -59,7 +58,7 @@ class loop int isArray(char c) const { - for (int i=0; i < numArrays_; ++i) + for (int i = 0; i < numArrays_; ++i) if (arrays_[i] == c) return 1; return 0; @@ -67,14 +66,19 @@ class loop int isScalar(char c) const { - for (int i=0; i < numScalars_; ++i) + for (int i = 0; i < numScalars_; ++i) if (scalars_[i] == c) return 1; return 0; } private: - enum { maxArrays = 20, maxScalars = 20, bufLen = 128 }; + enum + { + maxArrays = 20, + maxScalars = 20, + bufLen = 128 + }; char arrays_[maxArrays]; int numArrays_; @@ -94,42 +98,54 @@ void loop::reset() flops_ = 0; } -void loop::parseLoop(istream& is) +void loop::parseLoop(istream &is) { const int bufLen = 128; char buffer[bufLen]; - const char* whitespace = " \t"; + const char *whitespace = " \t"; reset(); - while (!is.eof()) { + while (!is.eof()) + { is.getline(buffer, bufLen); - char* token = strtok(buffer, whitespace); + char *token = strtok(buffer, whitespace); if (!token) continue; - if (!strcmp(token, "begin")) { + if (!strcmp(token, "begin")) + { token = strtok(0, whitespace); strcpy(loopName_, token); cout << "Creating loop: " << loopName_ << endl; - } else if (!strcmp(token, "end")) + } + else if (!strcmp(token, "end")) return; - else if (!strcmp(token, "array")) { - while (token = strtok(0, whitespace)) { + else if (!strcmp(token, "array")) + { + while (token = strtok(0, whitespace)) + { arrays_[numArrays_++] = token[0]; cout << "Array: " << token[0] << endl; } - } else if (!strcmp(token, "scalar")) { - while (token = strtok(0, whitespace)) { + } + else if (!strcmp(token, "scalar")) + { + while (token = strtok(0, whitespace)) + { scalars_[numScalars_++] = token[0]; cout << "Scalar: " << token[0] << endl; } - } else if (!strcmp(token, "flops")) { + } + else if (!strcmp(token, "flops")) + { token = strtok(0, whitespace); flops_ = atoi(token); cout << "Flops: " << flops_ << endl; - } else if (!strcmp(token, "loop")) { + } + else if (!strcmp(token, "loop")) + { loopBuffer_[0] = '\0'; while (token = strtok(0, whitespace)) strcat(loopBuffer_, token); @@ -138,40 +154,44 @@ void loop::parseLoop(istream& is) } } -void fortranVersion(loop& lp) +void fortranVersion(loop &lp) { - const char* numtype = "REAL*8"; + const char *numtype = "REAL*8"; char filename[128]; sprintf(filename, "%sf.f", lp.loopName()); ofstream ofs(filename); ofs << " SUBROUTINE " << lp.loopName() << "_F77(N"; - for (int i=0; i < lp.numArrays(); ++i) + for (int i = 0; i < lp.numArrays(); ++i) ofs << ", " << lp.arrayName(i); - for (int i=0; i < lp.numScalars(); ++i) + for (int i = 0; i < lp.numScalars(); ++i) ofs << ", " << lp.scalarName(i); ofs << ")" << endl - << " INTEGER i, N" << endl - << " " << numtype << " " << lp.arrayName(0) << "(N)"; + << " INTEGER i, N" << endl + << " " << numtype << " " << lp.arrayName(0) << "(N)"; - for (int i=1; i < lp.numArrays(); ++i) + for (int i = 1; i < lp.numArrays(); ++i) ofs << ", " << lp.arrayName(i) << "(N)"; - for (int i=0; i < lp.numScalars(); ++i) + for (int i = 0; i < lp.numScalars(); ++i) ofs << ", " << lp.scalarName(i); - ofs << endl << endl - << " DO i=1,N" << endl - << " "; + ofs << endl + << endl + << " DO i=1,N" << endl + << " "; - const char* loopBuffer = lp.loopBuffer(); + const char *loopBuffer = lp.loopBuffer(); - for (int i=0; loopBuffer[i]; ++i) { + for (int i = 0; loopBuffer[i]; ++i) + { if (loopBuffer[i] == ';') - ofs << endl << " "; + ofs << endl + << " "; else if (loopBuffer[i] != '$') ofs << loopBuffer[i]; - else { + else + { ++i; if (lp.isArray(loopBuffer[i])) ofs << loopBuffer[i] << "(i)"; @@ -181,230 +201,263 @@ void fortranVersion(loop& lp) } ofs << endl - << " END DO" << endl; + << " END DO" << endl; ofs << " RETURN" << endl - << " END" << endl; + << " END" << endl; - ofs << endl << endl - << " SUBROUTINE " << lp.loopName() << "_F77Overhead(N"; + ofs << endl + << endl + << " SUBROUTINE " << lp.loopName() << "_F77Overhead(N"; - for (int i=0; i < lp.numArrays(); ++i) + for (int i = 0; i < lp.numArrays(); ++i) ofs << ", " << lp.arrayName(i); - for (int i=0; i < lp.numScalars(); ++i) + for (int i = 0; i < lp.numScalars(); ++i) ofs << ", " << lp.scalarName(i); ofs << ")" << endl - << " INTEGER i, N" << endl - << " " << numtype << " " << lp.arrayName(0) << "(N)"; + << " INTEGER i, N" << endl + << " " << numtype << " " << lp.arrayName(0) << "(N)"; - for (int i=1; i < lp.numArrays(); ++i) + for (int i = 1; i < lp.numArrays(); ++i) ofs << ", " << lp.arrayName(i) << "(N)"; - for (int i=0; i < lp.numScalars(); ++i) + for (int i = 0; i < lp.numScalars(); ++i) ofs << ", " << lp.scalarName(i); ofs << endl - << " RETURN" << endl - << " END" << endl; + << " RETURN" << endl + << " END" << endl; } -void fortran90Version(loop& lp) +void fortran90Version(loop &lp) { - const char* numtype = "REAL*8"; + const char *numtype = "REAL*8"; char filename[128]; sprintf(filename, "%sf90.f90", lp.loopName()); ofstream ofs(filename); ofs << " SUBROUTINE " << lp.loopName() << "_F90(N"; - for (int i=0; i < lp.numArrays(); ++i) + for (int i = 0; i < lp.numArrays(); ++i) ofs << ", " << lp.arrayName(i); - for (int i=0; i < lp.numScalars(); ++i) + for (int i = 0; i < lp.numScalars(); ++i) ofs << ", " << lp.scalarName(i); ofs << ")" << endl - << " INTEGER i, N" << endl - << " " << numtype << " " << lp.arrayName(0) << "(N)"; + << " INTEGER i, N" << endl + << " " << numtype << " " << lp.arrayName(0) << "(N)"; - for (int i=1; i < lp.numArrays(); ++i) + for (int i = 1; i < lp.numArrays(); ++i) ofs << ", " << lp.arrayName(i) << "(N)"; - for (int i=0; i < lp.numScalars(); ++i) + for (int i = 0; i < lp.numScalars(); ++i) ofs << ", " << lp.scalarName(i); - ofs << endl << endl - << " "; + ofs << endl + << endl + << " "; - const char* loopBuffer = lp.loopBuffer(); + const char *loopBuffer = lp.loopBuffer(); - for (int i=0; loopBuffer[i]; ++i) { + for (int i = 0; loopBuffer[i]; ++i) + { if (loopBuffer[i] == ';') - ofs << endl << " "; + ofs << endl + << " "; else if (loopBuffer[i] != '$') ofs << loopBuffer[i]; } ofs << endl - << " RETURN" << endl - << " END" << endl; + << " RETURN" << endl + << " END" << endl; - ofs << endl << endl - << " SUBROUTINE " << lp.loopName() << "_F90Overhead(N"; - for (int i=0; i < lp.numArrays(); ++i) + ofs << endl + << endl + << " SUBROUTINE " << lp.loopName() << "_F90Overhead(N"; + for (int i = 0; i < lp.numArrays(); ++i) ofs << ", " << lp.arrayName(i); - for (int i=0; i < lp.numScalars(); ++i) + for (int i = 0; i < lp.numScalars(); ++i) ofs << ", " << lp.scalarName(i); ofs << ")" << endl - << " INTEGER i, N" << endl - << " " << numtype << " " << lp.arrayName(0) << "(N)"; + << " INTEGER i, N" << endl + << " " << numtype << " " << lp.arrayName(0) << "(N)"; - for (int i=1; i < lp.numArrays(); ++i) + for (int i = 1; i < lp.numArrays(); ++i) ofs << ", " << lp.arrayName(i) << "(N)"; - for (int i=0; i < lp.numScalars(); ++i) + for (int i = 0; i < lp.numScalars(); ++i) ofs << ", " << lp.scalarName(i); - ofs << endl << endl - << " RETURN" << endl - << " END" << endl; + ofs << endl + << endl + << " RETURN" << endl + << " END" << endl; } -void writeFortranDecl(ofstream& ofs, const char* version, loop& lp, - const char* numtype); -void VectorVersion(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2); -void ArrayVersion(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2); -void ValarrayVersion(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2); -void F77Version(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2); -void F90Version(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2); - -void cppVersion(loop& lp) +void writeFortranDecl(ofstream &ofs, const char *version, loop &lp, + const char *numtype); +void VectorVersion(ostream &os, loop &lp, const char *numtype, + const char *scalarArgs, const char *scalarArgs2); +void ArrayVersion(ostream &os, loop &lp, const char *numtype, + const char *scalarArgs, const char *scalarArgs2); +void ValarrayVersion(ostream &os, loop &lp, const char *numtype, + const char *scalarArgs, const char *scalarArgs2); +void F77Version(ostream &os, loop &lp, const char *numtype, + const char *scalarArgs, const char *scalarArgs2); +void F90Version(ostream &os, loop &lp, const char *numtype, + const char *scalarArgs, const char *scalarArgs2); + +void cppVersion(loop &lp) { - const char* numtype = "double"; + const char *numtype = "double"; char filename[128]; sprintf(filename, "%s.cpp", lp.loopName()); ofstream ofs(filename); char capsLoopName[128]; - for (int i=0; i <= strlen(lp.loopName()); ++i) + for (int i = 0; i <= strlen(lp.loopName()); ++i) capsLoopName[i] = toupper(lp.loopName()[i]); - ofs << "// Generated code (makeloops.cpp) -- do not edit." << endl << endl - << "// In KAI C++ 3.2, restrict causes problems for copy propagation." - << endl << "// Temporary fix: disable restrict" << endl << endl - << "#define BZ_DISABLE_RESTRICT" << endl << endl - << - "#include \n" - "#include \n" - "#include \n" - "#include \n" - "\n" - "// Generated: " << __FILE__ << " " << __DATE__ << endl << endl << - "#ifdef BZ_HAVE_VALARRAY\n" - " #define BENCHMARK_VALARRAY\n" - "#endif\n\n" - "#ifdef BENCHMARK_VALARRAY\n" - "#include \n" - "#endif\n" - "\n" - "using namespace blitz;\n" - "\n" - "#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES)\n" - " #define " << lp.loopName() << "_f77 " << lp.loopName() << "_f77_\n" - " #define " << lp.loopName() << "_f77overhead " << lp.loopName() << "_f77overhead_\n" - - " #define " << lp.loopName() << "_f90 " << lp.loopName() << "_f90_\n" - " #define " << lp.loopName() << "_f90overhead " << lp.loopName() << "_f90overhead_\n" - - "#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES)\n" - " #define " << lp.loopName() << "_f77 " << lp.loopName() << "_f77__\n" - " #define " << lp.loopName() << "_f77overhead " << lp.loopName() << "_f77overhead__\n" - - " #define " << lp.loopName() << "_f90 " << lp.loopName() << "_f90__\n" - " #define " << lp.loopName() << "_f90overhead " << lp.loopName() << "_f90overhead__\n" - - "#elif defined(BZ_FORTRAN_SYMBOLS_CAPS)\n" - " #define " << lp.loopName() << "_f77 " << capsLoopName << "_F77\n" - " #define " << lp.loopName() << "_f77overhead " << capsLoopName << "_F77OVERHEAD\n" - " #define " << lp.loopName() << "_f90 " << capsLoopName << "_F90\n" - " #define " << lp.loopName() << "_f90overhead " << capsLoopName << "_F90OVERHEAD\n" - "#endif\n" - "\n" - "extern \"C\" {" << endl; + ofs << "// Generated code (makeloops.cpp) -- do not edit." << endl + << endl + << "// In KAI C++ 3.2, restrict causes problems for copy propagation." + << endl + << "// Temporary fix: disable restrict" << endl + << endl + << "#define BZ_DISABLE_RESTRICT" << endl + << endl + << "#include \n" + "#include \n" + "#include \n" + "#include \n" + "\n" + "// Generated: " + << __FILE__ << " " << __DATE__ << endl + << endl + << "#ifdef BZ_HAVE_VALARRAY\n" + " #define BENCHMARK_VALARRAY\n" + "#endif\n\n" + "#ifdef BENCHMARK_VALARRAY\n" + "#include \n" + "#endif\n" + "\n" + "using namespace blitz;\n" + "\n" + "#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES)\n" + " #define " + << lp.loopName() << "_f77 " << lp.loopName() << "_f77_\n" + " #define " + << lp.loopName() << "_f77overhead " << lp.loopName() << "_f77overhead_\n" + + " #define " + << lp.loopName() << "_f90 " << lp.loopName() << "_f90_\n" + " #define " + << lp.loopName() << "_f90overhead " << lp.loopName() << "_f90overhead_\n" + + "#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES)\n" + " #define " + << lp.loopName() << "_f77 " << lp.loopName() << "_f77__\n" + " #define " + << lp.loopName() << "_f77overhead " << lp.loopName() << "_f77overhead__\n" + + " #define " + << lp.loopName() << "_f90 " << lp.loopName() << "_f90__\n" + " #define " + << lp.loopName() << "_f90overhead " << lp.loopName() << "_f90overhead__\n" + + "#elif defined(BZ_FORTRAN_SYMBOLS_CAPS)\n" + " #define " + << lp.loopName() << "_f77 " << capsLoopName << "_F77\n" + " #define " + << lp.loopName() << "_f77overhead " << capsLoopName << "_F77OVERHEAD\n" + " #define " + << lp.loopName() << "_f90 " << capsLoopName << "_F90\n" + " #define " + << lp.loopName() << "_f90overhead " << capsLoopName << "_F90OVERHEAD\n" + "#endif\n" + "\n" + "extern \"C\" {" + << endl; writeFortranDecl(ofs, "_f77", lp, numtype); writeFortranDecl(ofs, "_f77overhead", lp, numtype); writeFortranDecl(ofs, "_f90", lp, numtype); writeFortranDecl(ofs, "_f90overhead", lp, numtype); - ofs << "}" << endl << endl; + ofs << "}" << endl + << endl; // Create a string with a list of arguments for the scalars ostrstream tmpbuf; - for (int i=0; i < lp.numScalars(); ++i) { + for (int i = 0; i < lp.numScalars(); ++i) + { tmpbuf << ", " << numtype << " " << lp.scalarName(i); } tmpbuf << '\0'; - const char* scalarArgs = tmpbuf.str(); + const char *scalarArgs = tmpbuf.str(); ofs << "void VectorVersion(BenchmarkExt& bench" - << scalarArgs << ");" << endl - << "void ArrayVersion(BenchmarkExt& bench" - << scalarArgs << ");" << endl - << "void F77Version(BenchmarkExt& bench" - << scalarArgs << ");" << endl - << "#ifdef FORTRAN_90" << endl - << "void F90Version(BenchmarkExt& bench" - << scalarArgs << ");" << endl - << "#endif" << endl - << "#ifdef BENCHMARK_VALARRAY" << endl - << "void ValarrayVersion(BenchmarkExt& bench" - << scalarArgs << ");" << endl << "#endif" << endl << endl; + << scalarArgs << ");" << endl + << "void ArrayVersion(BenchmarkExt& bench" + << scalarArgs << ");" << endl + << "void F77Version(BenchmarkExt& bench" + << scalarArgs << ");" << endl + << "#ifdef FORTRAN_90" << endl + << "void F90Version(BenchmarkExt& bench" + << scalarArgs << ");" << endl + << "#endif" << endl + << "#ifdef BENCHMARK_VALARRAY" << endl + << "void ValarrayVersion(BenchmarkExt& bench" + << scalarArgs << ");" << endl + << "#endif" << endl + << endl; ofs << "void sink() {}\n\n"; ofs << "int main()\n" - "{\n" - " int numBenchmarks = 5;\n" - "#ifndef BENCHMARK_VALARRAY\n" - " numBenchmarks--; // No valarray\n" - "#endif\n" - "#ifndef FORTRAN_90\n" - " numBenchmarks--; // No fortran 90\n" - "#endif\n" - - "\n" - " BenchmarkExt bench(\"" << lp.loopName() << ": " - << lp.loopBuffer() << "\", numBenchmarks);\n" - "\n" - " const int numSizes = 23;\n" - " bench.setNumParameters(numSizes);\n" - " bench.setRateDescription(\"Mflops/s\");\n" - "\n" - " Vector parameters(numSizes);\n" - " Vector iters(numSizes);\n" - " Vector flops(numSizes);\n" - "\n" - " for (int i=0; i < numSizes; ++i)\n" - " {\n" - " parameters[i] = (int)pow(10.0, (i+1)/4.0);\n" - " iters[i] = 10000000L / parameters[i];\n" - " if (iters[i] < 2)\n" - " iters[i] = 2;\n" - " flops[i] = " << lp.flops() << " * parameters[i];\n" - " }\n" - "\n" - " bench.setParameterVector(parameters);\n" - " bench.setIterations(iters);\n" - " bench.setOpsPerIteration(flops);\n" - "\n" - " bench.beginBenchmarking();" << endl << endl; + "{\n" + " int numBenchmarks = 5;\n" + "#ifndef BENCHMARK_VALARRAY\n" + " numBenchmarks--; // No valarray\n" + "#endif\n" + "#ifndef FORTRAN_90\n" + " numBenchmarks--; // No fortran 90\n" + "#endif\n" + + "\n" + " BenchmarkExt bench(\"" + << lp.loopName() << ": " + << lp.loopBuffer() << "\", numBenchmarks);\n" + "\n" + " const int numSizes = 23;\n" + " bench.setNumParameters(numSizes);\n" + " bench.setRateDescription(\"Mflops/s\");\n" + "\n" + " Vector parameters(numSizes);\n" + " Vector iters(numSizes);\n" + " Vector flops(numSizes);\n" + "\n" + " for (int i=0; i < numSizes; ++i)\n" + " {\n" + " parameters[i] = (int)pow(10.0, (i+1)/4.0);\n" + " iters[i] = 10000000L / parameters[i];\n" + " if (iters[i] < 2)\n" + " iters[i] = 2;\n" + " flops[i] = " + << lp.flops() << " * parameters[i];\n" + " }\n" + "\n" + " bench.setParameterVector(parameters);\n" + " bench.setIterations(iters);\n" + " bench.setOpsPerIteration(flops);\n" + "\n" + " bench.beginBenchmarking();" + << endl + << endl; // Create literals - for (int i=0; i < lp.numScalars(); ++i) { + for (int i = 0; i < lp.numScalars(); ++i) + { ofs << " " << numtype << " " << lp.scalarName(i) - << " = 0.39123982498157938742;" << endl; + << " = 0.39123982498157938742;" << endl; } ofs << endl; @@ -413,44 +466,47 @@ void cppVersion(loop& lp) // Create a string with a list of arguments for the scalars ostrstream tmpbuf2; - for (int i=0; i < lp.numScalars(); ++i) { + for (int i = 0; i < lp.numScalars(); ++i) + { tmpbuf2 << ", " << lp.scalarName(i); } tmpbuf2 << '\0'; - char* scalarArgs2 = tmpbuf2.str(); + char *scalarArgs2 = tmpbuf2.str(); ofs << " VectorVersion(bench" << scalarArgs2 << ");" << endl - << " ArrayVersion(bench" << scalarArgs2 << ");" << endl - << " F77Version(bench" << scalarArgs2 << ");" << endl - << "#ifdef FORTRAN_90" << endl - << " F90Version(bench" << scalarArgs2 << ");" << endl - << "#endif" << endl - << "#ifdef BENCHMARK_VALARRAY" << endl - << " ValarrayVersion(bench" << scalarArgs2 << ");" << endl - << "#endif" << endl << endl << - " bench.endBenchmarking();\n" - "\n" - " bench.saveMatlabGraph(\"" << lp.loopName() << ".m\");\n" - "\n" - " return 0;\n" - "}\n\n" - "template\n" - "void initializeRandomDouble(T data, int numElements, int stride = 1)\n" - "{\n" - " static Random rnd;\n" - "\n" - " for (int i=0; i < numElements; ++i)\n" - " data[size_t(i*stride)] = rnd.random();\n" - "}\n" - "\n" - "template\n" - "void initializeArray(T& array, int numElements)\n" - "{\n" - " static Random rnd;\n" - "\n" - " for (size_t i=0; i < numElements; ++i)\n" - " array[i] = rnd.random();\n" - "}\n\n"; + << " ArrayVersion(bench" << scalarArgs2 << ");" << endl + << " F77Version(bench" << scalarArgs2 << ");" << endl + << "#ifdef FORTRAN_90" << endl + << " F90Version(bench" << scalarArgs2 << ");" << endl + << "#endif" << endl + << "#ifdef BENCHMARK_VALARRAY" << endl + << " ValarrayVersion(bench" << scalarArgs2 << ");" << endl + << "#endif" << endl + << endl + << " bench.endBenchmarking();\n" + "\n" + " bench.saveMatlabGraph(\"" + << lp.loopName() << ".m\");\n" + "\n" + " return 0;\n" + "}\n\n" + "template\n" + "void initializeRandomDouble(T data, int numElements, int stride = 1)\n" + "{\n" + " static Random rnd;\n" + "\n" + " for (int i=0; i < numElements; ++i)\n" + " data[size_t(i*stride)] = rnd.random();\n" + "}\n" + "\n" + "template\n" + "void initializeArray(T& array, int numElements)\n" + "{\n" + " static Random rnd;\n" + "\n" + " for (size_t i=0; i < numElements; ++i)\n" + " array[i] = rnd.random();\n" + "}\n\n"; VectorVersion(ofs, lp, numtype, scalarArgs, scalarArgs2); ArrayVersion(ofs, lp, numtype, scalarArgs, scalarArgs2); @@ -459,299 +515,315 @@ void cppVersion(loop& lp) F90Version(ofs, lp, numtype, scalarArgs, scalarArgs2); } -void writeFortranDecl(ofstream& ofs, const char* version, loop& lp, - const char* numtype) +void writeFortranDecl(ofstream &ofs, const char *version, loop &lp, + const char *numtype) { ofs << " void " << lp.loopName() << version - << "(const int& N"; + << "(const int& N"; - for (int i=0; i < lp.numArrays(); ++i) + for (int i = 0; i < lp.numArrays(); ++i) ofs << ", " << numtype << "* " << lp.arrayName(i); - for (int i=0; i < lp.numScalars(); ++i) + for (int i = 0; i < lp.numScalars(); ++i) ofs << ", const " << numtype << "& " << lp.scalarName(i); - ofs << ");" << endl << endl; + ofs << ");" << endl + << endl; } -void VectorVersion(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2) +void VectorVersion(ostream &os, loop &lp, const char *numtype, + const char *scalarArgs, const char *scalarArgs2) { os << "void VectorVersion(BenchmarkExt& bench" - << scalarArgs << ")\n" - << - "{\n" - " bench.beginImplementation(\"Vector\");\n" - "\n" - " while (!bench.doneImplementationBenchmark())\n" - " {\n" - " int N = bench.getParameter();\n" - " cout << \"Vector: N = \" << N << endl;\n" - " cout.flush();\n" - "\n" - " long iters = bench.getIterations();\n" - "\n"; - - for (int i=0; i < lp.numArrays(); ++i) { + << scalarArgs << ")\n" + << "{\n" + " bench.beginImplementation(\"Vector\");\n" + "\n" + " while (!bench.doneImplementationBenchmark())\n" + " {\n" + " int N = bench.getParameter();\n" + " cout << \"Vector: N = \" << N << endl;\n" + " cout.flush();\n" + "\n" + " long iters = bench.getIterations();\n" + "\n"; + + for (int i = 0; i < lp.numArrays(); ++i) + { os << " Vector<" << numtype << "> " << lp.arrayName(i) - << "(N);" << endl - << " initializeRandomDouble(" << lp.arrayName(i) << ".data(), N);" << endl; + << "(N);" << endl + << " initializeRandomDouble(" << lp.arrayName(i) << ".data(), N);" << endl; } - os << endl << - " bench.start();\n" - " for (long i=0; i < iters; ++i)\n" - " {\n" - " "; + os << endl + << " bench.start();\n" + " for (long i=0; i < iters; ++i)\n" + " {\n" + " "; - const char* loopBuffer = lp.loopBuffer(); + const char *loopBuffer = lp.loopBuffer(); - for (int i=0; loopBuffer[i]; ++i) { + for (int i = 0; loopBuffer[i]; ++i) + { if (loopBuffer[i] != '$') os << loopBuffer[i]; } - os << ";" << endl << - " sink();\n"; - - os << - " }\n" - " bench.stop();\n\n" - " bench.startOverhead();\n" - " for (long i=0; i < iters; ++i)\n" - " sink();\n" - " bench.stopOverhead();\n" - - " }\n" - "\n" - " bench.endImplementation();\n" - "}" << endl << endl; + os << ";" << endl + << " sink();\n"; + + os << " }\n" + " bench.stop();\n\n" + " bench.startOverhead();\n" + " for (long i=0; i < iters; ++i)\n" + " sink();\n" + " bench.stopOverhead();\n" + + " }\n" + "\n" + " bench.endImplementation();\n" + "}" + << endl + << endl; } -void ArrayVersion(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2) +void ArrayVersion(ostream &os, loop &lp, const char *numtype, + const char *scalarArgs, const char *scalarArgs2) { os << "void ArrayVersion(BenchmarkExt& bench" - << scalarArgs << ")\n" - << - "{\n" - " bench.beginImplementation(\"Array\");\n" - "\n" - " while (!bench.doneImplementationBenchmark())\n" - " {\n" - " int N = bench.getParameter();\n" - " cout << \"Array: N = \" << N << endl;\n" - " cout.flush();\n" - "\n" - " long iters = bench.getIterations();\n" - "\n"; - - for (int i=0; i < lp.numArrays(); ++i) { + << scalarArgs << ")\n" + << "{\n" + " bench.beginImplementation(\"Array\");\n" + "\n" + " while (!bench.doneImplementationBenchmark())\n" + " {\n" + " int N = bench.getParameter();\n" + " cout << \"Array: N = \" << N << endl;\n" + " cout.flush();\n" + "\n" + " long iters = bench.getIterations();\n" + "\n"; + + for (int i = 0; i < lp.numArrays(); ++i) + { os << " Array<" << numtype << ", 1> " << lp.arrayName(i) - << "(N);" << endl - << " initializeRandomDouble(" << lp.arrayName(i) << ".dataFirst(), N);" << endl; + << "(N);" << endl + << " initializeRandomDouble(" << lp.arrayName(i) << ".dataFirst(), N);" << endl; } - os << endl << - " bench.start();\n" - " for (long i=0; i < iters; ++i)\n" - " {\n" - " "; + os << endl + << " bench.start();\n" + " for (long i=0; i < iters; ++i)\n" + " {\n" + " "; - const char* loopBuffer = lp.loopBuffer(); + const char *loopBuffer = lp.loopBuffer(); - for (int i=0; loopBuffer[i]; ++i) { + for (int i = 0; loopBuffer[i]; ++i) + { if (loopBuffer[i] != '$') os << loopBuffer[i]; } - os << ";" << endl << - " sink();\n"; - - os << - " }\n" - " bench.stop();\n\n" - " bench.startOverhead();\n" - " for (long i=0; i < iters; ++i)\n" - " sink();\n" - " bench.stopOverhead();\n"; - os << - " }\n" - "\n" - " bench.endImplementation();\n" - "}" << endl << endl; + os << ";" << endl + << " sink();\n"; + + os << " }\n" + " bench.stop();\n\n" + " bench.startOverhead();\n" + " for (long i=0; i < iters; ++i)\n" + " sink();\n" + " bench.stopOverhead();\n"; + os << " }\n" + "\n" + " bench.endImplementation();\n" + "}" + << endl + << endl; } -void F77Version(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2) +void F77Version(ostream &os, loop &lp, const char *numtype, + const char *scalarArgs, const char *scalarArgs2) { os << "void F77Version(BenchmarkExt& bench" - << scalarArgs << ")\n" - "{\n" - " bench.beginImplementation(\"Fortran 77\");\n" - "\n" - " while (!bench.doneImplementationBenchmark())\n" - " {\n" - " int N = bench.getParameter();\n\n" - " cout << \"Fortran 77: N = \" << N << endl;\n" - " cout.flush();\n\n" - " int iters = bench.getIterations();\n" - "\n"; - - for (int i=0; i < lp.numArrays(); ++i) { + << scalarArgs << ")\n" + "{\n" + " bench.beginImplementation(\"Fortran 77\");\n" + "\n" + " while (!bench.doneImplementationBenchmark())\n" + " {\n" + " int N = bench.getParameter();\n\n" + " cout << \"Fortran 77: N = \" << N << endl;\n" + " cout.flush();\n\n" + " int iters = bench.getIterations();\n" + "\n"; + + for (int i = 0; i < lp.numArrays(); ++i) + { os << " " << numtype << "* " << lp.arrayName(i) - << " = new " << numtype << "[N];" << endl - << " initializeRandomDouble(" << lp.arrayName(i) - << ", N);" << endl; + << " = new " << numtype << "[N];" << endl + << " initializeRandomDouble(" << lp.arrayName(i) + << ", N);" << endl; } - os << endl << - " bench.start();\n" - " for (int iter=0; iter < iters; ++iter)\n" - " " << lp.loopName() << "_f77(N"; + os << endl + << " bench.start();\n" + " for (int iter=0; iter < iters; ++iter)\n" + " " + << lp.loopName() << "_f77(N"; - for (int i=0; i < lp.numArrays(); ++i) + for (int i = 0; i < lp.numArrays(); ++i) os << ", " << lp.arrayName(i); os << scalarArgs2 << ");\n" - " bench.stop();\n\n" - " bench.startOverhead();\n" - " for (int iter=0; iter < iters; ++iter)\n" - " " << lp.loopName() << "_f77overhead(N"; - for (int i=0; i < lp.numArrays(); ++i) + " bench.stop();\n\n" + " bench.startOverhead();\n" + " for (int iter=0; iter < iters; ++iter)\n" + " " + << lp.loopName() << "_f77overhead(N"; + for (int i = 0; i < lp.numArrays(); ++i) os << ", " << lp.arrayName(i); os << scalarArgs2 << ");\n"; - os << endl << - " bench.stopOverhead();\n"; + os << endl + << " bench.stopOverhead();\n"; - for (int i=0; i < lp.numArrays(); ++i) { + for (int i = 0; i < lp.numArrays(); ++i) + { os << " delete [] " << lp.arrayName(i) << ";" << endl; } os << " }\n" - "\n" - " bench.endImplementation();\n" - "}\n" << endl; - + "\n" + " bench.endImplementation();\n" + "}\n" + << endl; } -void F90Version(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2) +void F90Version(ostream &os, loop &lp, const char *numtype, + const char *scalarArgs, const char *scalarArgs2) { os << "#ifdef FORTRAN_90" << endl - << "void F90Version(BenchmarkExt& bench" - << scalarArgs << ")\n" - "{\n" - " bench.beginImplementation(\"Fortran 90\");\n" - "\n" - " while (!bench.doneImplementationBenchmark())\n" - " {\n" - " int N = bench.getParameter();\n\n" - " cout << \"Fortran 90: N = \" << N << endl;\n" - " cout.flush();\n\n" - " int iters = bench.getIterations();\n" - "\n"; - - for (int i=0; i < lp.numArrays(); ++i) { + << "void F90Version(BenchmarkExt& bench" + << scalarArgs << ")\n" + "{\n" + " bench.beginImplementation(\"Fortran 90\");\n" + "\n" + " while (!bench.doneImplementationBenchmark())\n" + " {\n" + " int N = bench.getParameter();\n\n" + " cout << \"Fortran 90: N = \" << N << endl;\n" + " cout.flush();\n\n" + " int iters = bench.getIterations();\n" + "\n"; + + for (int i = 0; i < lp.numArrays(); ++i) + { os << " " << numtype << "* " << lp.arrayName(i) - << " = new " << numtype << "[N];" << endl - << " initializeRandomDouble(" << lp.arrayName(i) - << ", N);" << endl; + << " = new " << numtype << "[N];" << endl + << " initializeRandomDouble(" << lp.arrayName(i) + << ", N);" << endl; } - os << endl << - " bench.start();\n" - " for (int iter=0; iter < iters; ++iter)\n" - " " << lp.loopName() << "_f90(N"; + os << endl + << " bench.start();\n" + " for (int iter=0; iter < iters; ++iter)\n" + " " + << lp.loopName() << "_f90(N"; - for (int i=0; i < lp.numArrays(); ++i) + for (int i = 0; i < lp.numArrays(); ++i) os << ", " << lp.arrayName(i); os << scalarArgs2 << ");\n" - " bench.stop();\n\n" - " bench.startOverhead();\n" - " for (int iter=0; iter < iters; ++iter)\n" - " " << lp.loopName() << "_f90overhead(N"; - for (int i=0; i < lp.numArrays(); ++i) + " bench.stop();\n\n" + " bench.startOverhead();\n" + " for (int iter=0; iter < iters; ++iter)\n" + " " + << lp.loopName() << "_f90overhead(N"; + for (int i = 0; i < lp.numArrays(); ++i) os << ", " << lp.arrayName(i); os << scalarArgs2 << ");\n"; - os << endl << - " bench.stopOverhead();\n"; + os << endl + << " bench.stopOverhead();\n"; - for (int i=0; i < lp.numArrays(); ++i) { + for (int i = 0; i < lp.numArrays(); ++i) + { os << " delete [] " << lp.arrayName(i) << ";" << endl; } os << " }\n" - "\n" - " bench.endImplementation();\n" - "}\n" - << "#endif\n" << endl; - + "\n" + " bench.endImplementation();\n" + "}\n" + << "#endif\n" + << endl; } -void ValarrayVersion(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2) +void ValarrayVersion(ostream &os, loop &lp, const char *numtype, + const char *scalarArgs, const char *scalarArgs2) { os << "#ifdef BENCHMARK_VALARRAY" << endl; os << "void ValarrayVersion(BenchmarkExt& bench" - << scalarArgs << ")\n" - << - "{\n" - " bench.beginImplementation(\"valarray\");\n" - "\n" - " while (!bench.doneImplementationBenchmark())\n" - " {\n" - " int N = bench.getParameter();\n" - " cout << \"valarray: N = \" << N << endl;\n" - " cout.flush();\n" - "\n" - " long iters = bench.getIterations();\n" - "\n"; - - for (int i=0; i < lp.numArrays(); ++i) { + << scalarArgs << ")\n" + << "{\n" + " bench.beginImplementation(\"valarray\");\n" + "\n" + " while (!bench.doneImplementationBenchmark())\n" + " {\n" + " int N = bench.getParameter();\n" + " cout << \"valarray: N = \" << N << endl;\n" + " cout.flush();\n" + "\n" + " long iters = bench.getIterations();\n" + "\n"; + + for (int i = 0; i < lp.numArrays(); ++i) + { os << " valarray<" << numtype << "> " << lp.arrayName(i) - << "(N);" << endl - << " initializeArray(" << lp.arrayName(i) << ", N);" << endl; + << "(N);" << endl + << " initializeArray(" << lp.arrayName(i) << ", N);" << endl; } - os << endl << - " bench.start();\n" - " for (long i=0; i < iters; ++i)\n" - " {\n" - " "; + os << endl + << " bench.start();\n" + " for (long i=0; i < iters; ++i)\n" + " {\n" + " "; - const char* loopBuffer = lp.loopBuffer(); + const char *loopBuffer = lp.loopBuffer(); - for (int i=0; loopBuffer[i]; ++i) { + for (int i = 0; loopBuffer[i]; ++i) + { if (loopBuffer[i] != '$') os << loopBuffer[i]; } - os << ";" << endl << - " sink();\n"; - - os << - " }\n" - " bench.stop();\n\n" - " bench.startOverhead();\n" - " for (long i=0; i < iters; ++i)\n" - " sink();\n" - " bench.stopOverhead();\n" - - " }\n" - "\n" - " bench.endImplementation();\n" - "}" << endl << endl << "#endif" << endl; + os << ";" << endl + << " sink();\n"; + + os << " }\n" + " bench.stop();\n\n" + " bench.startOverhead();\n" + " for (long i=0; i < iters; ++i)\n" + " sink();\n" + " bench.stopOverhead();\n" + + " }\n" + "\n" + " bench.endImplementation();\n" + "}" + << endl + << endl + << "#endif" << endl; } int main() { ifstream ifs("loops.data"); - //ofstream ofs("makefile.inc"); + // ofstream ofs("makefile.inc"); loop lp; - while (!ifs.eof()) { + while (!ifs.eof()) + { lp.parseLoop(ifs); if (ifs.eof()) @@ -779,13 +851,10 @@ int main() */ fortranVersion(lp); -//#ifdef FORTRAN_90 + // #ifdef FORTRAN_90 fortran90Version(lp); -//#endif + // #endif cppVersion(lp); } return 0; } - - - diff --git a/blitz/array/fastiter.h b/blitz/array/fastiter.h index 4138a82b..fc6877ea 100644 --- a/blitz/array/fastiter.h +++ b/blitz/array/fastiter.h @@ -8,7 +8,7 @@ * * This file is a part of Blitz. * - * Blitz is free software: you can redistribute it and/or modify + * Blitz is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. @@ -18,11 +18,11 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * - * You should have received a copy of the GNU Lesser General Public + * You should have received a copy of the GNU Lesser General Public * License along with Blitz. If not, see . - * + * * Suggestions: blitz-devel@lists.sourceforge.net - * Bugs: blitz-support@lists.sourceforge.net + * Bugs: blitz-support@lists.sourceforge.net * * For more information, please see the Blitz++ Home Page: * https://sourceforge.net/projects/blitz/ @@ -41,174 +41,199 @@ #include #ifdef BZ_HAVE_STD - #include +#include #else - #include +#include #endif -namespace blitz { - -// Wrapper to turn expressions with FAIs to FACIs so they can be -// returned from a function. -template -typename T::T_range_result safeToReturn(const T& expr) { - return expr(expr.domain()); -} - - -// forward declaration -template class FastArrayIterator; -template class FastArrayCopyIterator; - - -template -class FastArrayIteratorBase { -public: - typedef P_numtype T_numtype; - typedef typename opType::T_optype T_optype; - // if T_numtype is POD, then T_result is T_numtype, but if T_numtype - // is an ET class, T_result will be the array class for that class. - typedef typename asET::T_wrapped T_typeprop; - typedef typename unwrapET::T_unwrapped T_result; +namespace blitz +{ - /// Result type for fastRead_tv is a FastTVIterator. - typedef ETBase::vecWidth> > T_tvtypeprop; - typedef typename unwrapET::T_unwrapped T_tvresult; + // Wrapper to turn expressions with FAIs to FACIs so they can be + // returned from a function. + template + typename T::T_range_result safeToReturn(const T &expr) + { + return expr(expr.domain()); + } - typedef Array T_array; - typedef FastArrayIteratorBase T_iterator; - typedef const T_array& T_ctorArg1; - typedef int T_ctorArg2; // dummy - typedef FastArrayCopyIterator T_range_result; + // forward declaration + template + class FastArrayIterator; + template + class FastArrayCopyIterator; - static const int - numArrayOperands = 1, - numTVOperands = 0, + template + class FastArrayIteratorBase + { + public: + typedef P_numtype T_numtype; + typedef typename opType::T_optype T_optype; + // if T_numtype is POD, then T_result is T_numtype, but if T_numtype + // is an ET class, T_result will be the array class for that class. + typedef typename asET::T_wrapped T_typeprop; + typedef typename unwrapET::T_unwrapped T_result; + + /// Result type for fastRead_tv is a FastTVIterator. + typedef ETBase::vecWidth>> + T_tvtypeprop; + typedef typename unwrapET::T_unwrapped T_tvresult; + + typedef Array T_array; + typedef FastArrayIteratorBase T_iterator; + typedef const T_array &T_ctorArg1; + typedef int T_ctorArg2; // dummy + typedef FastArrayCopyIterator T_range_result; + + static const int + numArrayOperands = 1, + numTVOperands = 0, numTMOperands = 0, numIndexPlaceholders = 0, - minWidth = simdTypes::vecWidth, - maxWidth = simdTypes::vecWidth, + minWidth = simdTypes::vecWidth, + maxWidth = simdTypes::vecWidth, rank_ = N_rank; - /** For an iterator, the vectorized result for width N is always a - TinyVector. */ - template struct tvresult { - typedef FastTV2Iterator Type; - }; + /** For an iterator, the vectorized result for width N is always a + TinyVector. */ + template + struct tvresult + { + typedef FastTV2Iterator Type; + }; // NB: this ctor does NOT preserve stack and stride // parameters. This is for speed purposes. - FastArrayIteratorBase(const T_iterator& x) + FastArrayIteratorBase(const T_iterator &x) : data_(x.data_), array_(x.array_) - { } + { + } - void operator=(const T_iterator& x) + void operator=(const T_iterator &x) { // doesn't this copy the data in x.array_ and then make data_ // point to x's array? doesn't seem right - array_ = x.array_; - data_ = x.data_; - stack_ = x.stack_; - stride_ = x.stride_; + array_ = x.array_; + data_ = x.data_; + stack_ = x.stack_; + stride_ = x.stride_; } - FastArrayIteratorBase(const T_array& array) + FastArrayIteratorBase(const T_array &array) : array_(array) { - data_ = array_.data(); + data_ = array_.data(); } ~FastArrayIteratorBase() - { } + { + } #ifdef BZ_ARRAY_EXPR_PASS_INDEX_BY_VALUE T_result operator()(TinyVector i) const - { return array_(i); } + { + return array_(i); + } #else - T_result operator()(const TinyVector& i) const - { return array_(i); } + T_result operator()(const TinyVector &i) const + { + return array_(i); + } #endif int ascending(const int rank) const { - if (rank < N_rank) - return array_.isRankStoredAscending(rank); - else - return INT_MIN; // tiny(int()); + if (rank < N_rank) + return array_.isRankStoredAscending(rank); + else + return INT_MIN; // tiny(int()); } int ordering(const int rank) const { - if (rank < N_rank) - return array_.ordering(rank); - else - return INT_MIN; // tiny(int()); + if (rank < N_rank) + return array_.ordering(rank); + else + return INT_MIN; // tiny(int()); } int lbound(const int rank) const - { - if (rank < N_rank) - return array_.lbound(rank); - else - return INT_MIN; // tiny(int()); + { + if (rank < N_rank) + return array_.lbound(rank); + else + return INT_MIN; // tiny(int()); } int ubound(const int rank) const - { - if (rank < N_rank) - return array_.ubound(rank); - else - return INT_MAX; // huge(int()); + { + if (rank < N_rank) + return array_.ubound(rank); + else + return INT_MAX; // huge(int()); } - + RectDomain domain() const { return array_.domain(); }; T_result first_value() const { return *data_; } T_result operator*() const - { return *data_; } + { + return *data_; + } - template - T_range_result operator()(const RectDomain& d) const - { - return T_range_result(array_(d)); - } + template + T_range_result operator()(const RectDomain &d) const + { + return T_range_result(array_(d)); + } T_result operator[](int i) const - { return data_[i * stride_]; } + { + return data_[i * stride_]; + } T_result fastRead(diffType i) const - { return data_[i]; } - - /** Returns a TinyVector "view" of the data at i, with a vector - length specified by the template parameter N. This makes it - possible to convert a small part of an arbitrary expression into - a TinyVector expression, which is efficiently vectorized. */ - template - typename tvresult::Type fastRead_tv(diffType i) const - { - return typename tvresult::Type(*reinterpret_cast*>(&data_[i])); } + { + return data_[i]; + } - /** Returns true if the iterator data is aligned on a simd - vector. */ - bool isVectorAligned(diffType offset) const - { return simdTypes::isVectorAligned(data_ + offset); }; + /** Returns a TinyVector "view" of the data at i, with a vector + length specified by the template parameter N. This makes it + possible to convert a small part of an arbitrary expression into + a TinyVector expression, which is efficiently vectorized. */ + template + typename tvresult::Type fastRead_tv(diffType i) const + { + return typename tvresult::Type(*reinterpret_cast *>(&data_[i])); + } + + /** Returns true if the iterator data is aligned on a simd + vector. */ + bool isVectorAligned(diffType offset) const + { + return simdTypes::isVectorAligned(data_ + offset); + }; int suggestStride(int rank) const - { return array_.stride(rank); } + { + return array_.stride(rank); + } bool isStride(int rank, diffType stride) const - { return array_.stride(rank) == stride; } + { + return array_.stride(rank) == stride; + } void push(int position) { - stack_[position] = data_; + stack_[position] = data_; } - + void pop(int position) - { - data_ = stack_[position]; + { + data_ = stack_[position]; } void advance() @@ -223,266 +248,295 @@ class FastArrayIteratorBase { void loadStride(int rank) { - stride_ = array_.stride(rank); + stride_ = array_.stride(rank); } - // returns the lvalue, ie a pointer to the data - const T_numtype * restrict data() const - { return data_; } + // returns the lvalue, ie a pointer to the data + const T_numtype *restrict data() const + { + return data_; + } - const T_array& array() const - { return array_; } + const T_array &array() const + { + return array_; + } - void _bz_setData(const T_numtype* ptr) - { data_ = ptr; } + void _bz_setData(const T_numtype *ptr) + { + data_ = ptr; + } // this is needed for the stencil expression fastRead to work void _bz_offsetData(sizeType i) - { data_ += i;} + { + data_ += i; + } // and these are needed for stencil expression shift to work void _bz_offsetData(sizeType offset, int dim) - { data_ += offset*array_.stride(dim); } - + { + data_ += offset * array_.stride(dim); + } + void _bz_offsetData(sizeType offset1, int dim1, sizeType offset2, int dim2) - { data_ += offset1*array_.stride(dim1); - data_ += offset2*array_.stride(dim2); } + { + data_ += offset1 * array_.stride(dim1); + data_ += offset2 * array_.stride(dim2); + } int stride() const - { return stride_; } + { + return stride_; + } - /** Returns true if the Array has unit stride in the rank. */ + /** Returns true if the Array has unit stride in the rank. */ bool isUnitStride(int rank) const - { return array_.stride(rank) == 1; } + { + return array_.stride(rank) == 1; + } - /** Returns true if the loaded iterator stride is 1. */ + /** Returns true if the loaded iterator stride is 1. */ bool isUnitStride() const - { return stride() == 1; } + { + return stride() == 1; + } void advanceUnitStride() - { ++data_; } + { + ++data_; + } bool canCollapse(int outerLoopRank, int innerLoopRank) const - { return array_.canCollapse(outerLoopRank, innerLoopRank); } + { + return array_.canCollapse(outerLoopRank, innerLoopRank); + } - void prettyPrint(std::string &str, - prettyPrintFormat& format) const + void prettyPrint(std::string &str, + prettyPrintFormat &format) const { - if (format.tersePrintingSelected()) - str += format.nextArrayOperandSymbol(); - else if (format.dumpArrayShapesMode()) - { + if (format.tersePrintingSelected()) + str += format.nextArrayOperandSymbol(); + else if (format.dumpArrayShapesMode()) + { #ifdef BZ_HAVE_STD - std::ostringstream ostr; + std::ostringstream ostr; #else - ostrstream ostr; + ostrstream ostr; #endif - ostr << array_.shape(); - str += ostr.str(); - } - else { - str += "Array<"; - str += BZ_DEBUG_TEMPLATE_AS_STRING_LITERAL(T_numtype); - str += ","; - - char tmpBuf[10]; - sprintf(tmpBuf, "%d", N_rank); - - str += tmpBuf; - str += ">"; - } + ostr << array_.shape(); + str += ostr.str(); + } + else + { + str += "Array<"; + str += BZ_DEBUG_TEMPLATE_AS_STRING_LITERAL(T_numtype); + str += ","; + + char tmpBuf[TEMP_SIZE_BUFFER] = {0}; + snprintf(tmpBuf, TEMP_SIZE_BUFFER, "%d", N_rank); + + str += tmpBuf; + str += ">"; + } } - template - bool shapeCheck(const T_shape& shape) const - { return areShapesConformable(shape, array_.length()); } - + template + bool shapeCheck(const T_shape &shape) const + { + return areShapesConformable(shape, array_.length()); + } // Experimental - T_numtype& operator()(int i) const + T_numtype &operator()(int i) const { - return (T_numtype&)data_[i*array_.stride(0)]; + return (T_numtype &)data_[i * array_.stride(0)]; } // Experimental - T_numtype& operator()(int i, int j) const + T_numtype &operator()(int i, int j) const { - return (T_numtype&)data_[i*array_.stride(0) + j*array_.stride(1)]; + return (T_numtype &)data_[i * array_.stride(0) + j * array_.stride(1)]; } // Experimental - T_numtype& operator()(int i, int j, int k) const + T_numtype &operator()(int i, int j, int k) const { - return (T_numtype&)data_[i*array_.stride(0) - + j*array_.stride(1) - + k*array_.stride(2)]; + return (T_numtype &)data_[i * array_.stride(0) + j * array_.stride(1) + k * array_.stride(2)]; } // Experimental void moveTo(int i) { - data_ = &const_cast(array_)(i); + data_ = &const_cast(array_)(i); } void moveTo(int i, int j) { - data_ = &const_cast(array_)(i,j); + data_ = &const_cast(array_)(i, j); } void moveTo(int i, int j, int k) { - data_ = &const_cast(array_)(i,j,k); + data_ = &const_cast(array_)(i, j, k); } - template - void moveTo(const TinyVector& i) + template + void moveTo(const TinyVector &i) { - data_ = &const_cast(array_)(i); + data_ = &const_cast(array_)(i); } // Experimental void operator=(T_numtype x) - { *const_cast(data_) = x; } + { + *const_cast(data_) = x; + } // Experimental - template + template void operator=(T_value x) - { *const_cast(data_) = x; } + { + *const_cast(data_) = x; + } // Experimental - template + template void operator+=(T_value x) - { *const_cast(data_) += x; } + { + *const_cast(data_) += x; + } // NEEDS_WORK: other operators - + // Experimental operator T_numtype() const - { return *data_; } + { + return *data_; + } // Experimental T_result shift(int offset, int dim) const { - return data_[offset*array_.stride(dim)]; + return data_[offset * array_.stride(dim)]; } // Experimental T_result shift(int offset1, int dim1, int offset2, int dim2) const { - return data_[offset1*array_.stride(dim1) - + offset2*array_.stride(dim2)]; + return data_[offset1 * array_.stride(dim1) + offset2 * array_.stride(dim2)]; } - // sliceinfo for expressions - template - class SliceInfo { - public: - typedef FastArrayCopyIterator::rank> T_slice; + // sliceinfo for expressions + template + class SliceInfo + { + public: + typedef FastArrayCopyIterator::rank> T_slice; + }; + + template + typename SliceInfo::T_slice + operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9, T10 r10, T11 r11) const + { + return typename SliceInfo::T_slice(array_(r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11)); + } + + protected: + const P_numtype *restrict data_; + P_arraytype array_; + ConstPointerStack stack_; + diffType stride_; }; - template - typename SliceInfo::T_slice - operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9, T10 r10, T11 r11) const + template + class FastArrayCopyIterator; + + template + class FastArrayIterator : public FastArrayIteratorBase &> { - return typename SliceInfo::T_slice(array_(r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11)); - } + public: + typedef FastArrayIteratorBase &> + T_base; + typedef typename T_base::T_numtype T_numtype; + typedef typename T_base::T_array T_array; + typedef typename T_base::T_iterator T_iterator; + typedef typename T_base::T_ctorArg1 T_ctorArg1; + typedef typename T_base::T_ctorArg2 T_ctorArg2; + typedef typename T_base::T_range_result T_range_result; + + using T_base::numArrayOperands; + using T_base::numIndexPlaceholders; + using T_base::numTMOperands; + using T_base::numTVOperands; + using T_base::rank_; -protected: - const P_numtype * restrict data_; - P_arraytype array_; - ConstPointerStack stack_; - diffType stride_; -}; + // NB: this ctor does NOT preserve stack and stride + // parameters. This is for speed purposes. + FastArrayIterator(const FastArrayIterator &x) + : T_base(x) + { + } + FastArrayIterator(const T_array &array) : T_base(array) {} -template class FastArrayCopyIterator; + using T_base::operator=; + void operator=(const FastArrayIterator &x) + { + T_base::operator=(x); + } -template -class FastArrayIterator : - public FastArrayIteratorBase&> -{ -public: - typedef FastArrayIteratorBase&> T_base; - typedef typename T_base::T_numtype T_numtype; - typedef typename T_base::T_array T_array; - typedef typename T_base::T_iterator T_iterator; - typedef typename T_base::T_ctorArg1 T_ctorArg1; - typedef typename T_base::T_ctorArg2 T_ctorArg2; - typedef typename T_base::T_range_result T_range_result; - - using T_base::rank_; - using T_base::numArrayOperands; - using T_base::numTVOperands; - using T_base::numTMOperands; - using T_base::numIndexPlaceholders; - - // NB: this ctor does NOT preserve stack and stride - // parameters. This is for speed purposes. - FastArrayIterator(const FastArrayIterator& x) - : T_base(x) - { } - - FastArrayIterator(const T_array& array) : T_base(array) {} - - using T_base::operator=; - void operator=(const FastArrayIterator& x) - { - T_base::operator=(x); - } - - using T_base::operator(); -}; - -/* This version of the FastArrayIterator makes a COPY of the array - it's pointing to. This makes it possible to return expressions of - arrays that have gone out of scope, or to slice expressions. */ -template -class FastArrayCopyIterator : - public FastArrayIteratorBase > -{ -public: - typedef FastArrayIteratorBase > T_base; - typedef typename T_base::T_numtype T_numtype; - typedef typename T_base::T_array T_array; - typedef typename T_base::T_iterator T_iterator; - typedef typename T_base::T_ctorArg1 T_ctorArg1; - typedef typename T_base::T_ctorArg2 T_ctorArg2; - typedef typename T_base::T_range_result T_range_result; - - using T_base::rank_; - using T_base::numArrayOperands; - using T_base::numTVOperands; - using T_base::numTMOperands; - using T_base::numIndexPlaceholders; - - - // NB: this ctor does NOT preserve stack and stride - // parameters. This is for speed purposes. - FastArrayCopyIterator(const FastArrayCopyIterator& x) - : T_base(x) - { } - - FastArrayCopyIterator(const T_array& array) : T_base(array) { } - - using T_base::operator=; - void operator=(const FastArrayCopyIterator& x) + using T_base::operator(); + }; + + /* This version of the FastArrayIterator makes a COPY of the array + it's pointing to. This makes it possible to return expressions of + arrays that have gone out of scope, or to slice expressions. */ + template + class FastArrayCopyIterator : public FastArrayIteratorBase> { - T_base::operator=(x); - } - - using T_base::operator(); + public: + typedef FastArrayIteratorBase> + T_base; + typedef typename T_base::T_numtype T_numtype; + typedef typename T_base::T_array T_array; + typedef typename T_base::T_iterator T_iterator; + typedef typename T_base::T_ctorArg1 T_ctorArg1; + typedef typename T_base::T_ctorArg2 T_ctorArg2; + typedef typename T_base::T_range_result T_range_result; + + using T_base::numArrayOperands; + using T_base::numIndexPlaceholders; + using T_base::numTMOperands; + using T_base::numTVOperands; + using T_base::rank_; -}; + // NB: this ctor does NOT preserve stack and stride + // parameters. This is for speed purposes. + FastArrayCopyIterator(const FastArrayCopyIterator &x) + : T_base(x) + { + } + + FastArrayCopyIterator(const T_array &array) : T_base(array) {} + + using T_base::operator=; + void operator=(const FastArrayCopyIterator &x) + { + T_base::operator=(x); + } + using T_base::operator(); + }; } diff --git a/blitz/prettyprint.h b/blitz/prettyprint.h index 6b1f6476..45b35b9a 100644 --- a/blitz/prettyprint.h +++ b/blitz/prettyprint.h @@ -9,7 +9,7 @@ * * This file is a part of Blitz. * - * Blitz is free software: you can redistribute it and/or modify + * Blitz is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. @@ -19,11 +19,11 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * - * You should have received a copy of the GNU Lesser General Public + * You should have received a copy of the GNU Lesser General Public * License along with Blitz. If not, see . - * + * * Suggestions: blitz-devel@lists.sourceforge.net - * Bugs: blitz-support@lists.sourceforge.net + * Bugs: blitz-support@lists.sourceforge.net * * For more information, please see the Blitz++ Home Page: * https://sourceforge.net/projects/blitz/ @@ -35,38 +35,42 @@ #include -namespace blitz { +#define TEMP_SIZE_BUFFER 10 -class prettyPrintFormat { +namespace blitz +{ -public: - prettyPrintFormat(const bool terse = false) - : tersePrintingSelected_(terse) + class prettyPrintFormat { - arrayOperandCounter_ = 0; - scalarOperandCounter_ = 0; - dumpArrayShapes_ = false; - } - void setDumpArrayShapesMode() { dumpArrayShapes_ = true; } - char nextArrayOperandSymbol() - { - return static_cast('A' + ((arrayOperandCounter_++) % 26)); - } - char nextScalarOperandSymbol() - { - return static_cast('s' + ((scalarOperandCounter_++) % 26)); - } + public: + prettyPrintFormat(const bool terse = false) + : tersePrintingSelected_(terse) + { + arrayOperandCounter_ = 0; + scalarOperandCounter_ = 0; + dumpArrayShapes_ = false; + } + + void setDumpArrayShapesMode() { dumpArrayShapes_ = true; } + char nextArrayOperandSymbol() + { + return static_cast('A' + ((arrayOperandCounter_++) % 26)); + } + char nextScalarOperandSymbol() + { + return static_cast('s' + ((scalarOperandCounter_++) % 26)); + } - bool tersePrintingSelected() const { return tersePrintingSelected_; } - bool dumpArrayShapesMode() const { return dumpArrayShapes_; } + bool tersePrintingSelected() const { return tersePrintingSelected_; } + bool dumpArrayShapesMode() const { return dumpArrayShapes_; } -private: - bool tersePrintingSelected_; - bool dumpArrayShapes_; - int arrayOperandCounter_; - int scalarOperandCounter_; -}; + private: + bool tersePrintingSelected_; + bool dumpArrayShapes_; + int arrayOperandCounter_; + int scalarOperandCounter_; + }; } diff --git a/blitz/tm2fastiter.h b/blitz/tm2fastiter.h index a2c411f2..7317f9bd 100644 --- a/blitz/tm2fastiter.h +++ b/blitz/tm2fastiter.h @@ -8,7 +8,7 @@ * * This file is a part of Blitz. * - * Blitz is free software: you can redistribute it and/or modify + * Blitz is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. @@ -18,11 +18,11 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * - * You should have received a copy of the GNU Lesser General Public + * You should have received a copy of the GNU Lesser General Public * License along with Blitz. If not, see . - * + * * Suggestions: blitz-devel@lists.sourceforge.net - * Bugs: blitz-support@lists.sourceforge.net + * Bugs: blitz-support@lists.sourceforge.net * * For more information, please see the Blitz++ Home Page: * https://sourceforge.net/projects/blitz/ @@ -32,9 +32,9 @@ #define BZ_ARRAY_TM2FASTITER_H #ifdef BZ_HAVE_STD - #include +#include #else - #include +#include #endif #include @@ -42,148 +42,169 @@ #include #include -namespace blitz { - -// forward declaration -template class FastTM2Iterator; -template class FastTM2CopyIterator; - - -template -class FastTM2IteratorBase { -public: - typedef P_numtype T_numtype; - typedef typename opType::T_optype T_optype; - // if T_numtype is POD, then T_result is T_numtype, but if T_numtype - // is an ET class, T_result will be the array class for that class. - typedef typename asET::T_wrapped T_typeprop; - typedef typename unwrapET::T_unwrapped T_result; - - /** Result type for fastRead_tv is a FastTVIterator. This should - only be used for mixed TM/Array expressions. */ - typedef ETBase::vecWidth> > T_tvtypeprop; - typedef typename unwrapET::T_unwrapped T_tvresult; - - typedef TinyMatrix T_matrix; - typedef FastTM2IteratorBase T_iterator; - typedef const T_matrix& T_ctorArg1; - typedef int T_ctorArg2; // dummy - typedef FastTM2CopyIterator T_range_result; - - static const int - numArrayOperands = 0, - numTVOperands = 0, - numTMOperands = 1, +namespace blitz +{ + + // forward declaration + template + class FastTM2Iterator; + template + class FastTM2CopyIterator; + + template + class FastTM2IteratorBase + { + public: + typedef P_numtype T_numtype; + typedef typename opType::T_optype T_optype; + // if T_numtype is POD, then T_result is T_numtype, but if T_numtype + // is an ET class, T_result will be the array class for that class. + typedef typename asET::T_wrapped T_typeprop; + typedef typename unwrapET::T_unwrapped T_result; + + /** Result type for fastRead_tv is a FastTVIterator. This should + only be used for mixed TM/Array expressions. */ + typedef ETBase::vecWidth>> + T_tvtypeprop; + typedef typename unwrapET::T_unwrapped T_tvresult; + + typedef TinyMatrix T_matrix; + typedef FastTM2IteratorBase T_iterator; + typedef const T_matrix &T_ctorArg1; + typedef int T_ctorArg2; // dummy + typedef FastTM2CopyIterator T_range_result; + + static const int + numArrayOperands = 0, + numTVOperands = 0, + numTMOperands = 1, numIndexPlaceholders = 0, - minWidth = simdTypes::vecWidth, - maxWidth = simdTypes::vecWidth, + minWidth = simdTypes::vecWidth, + maxWidth = simdTypes::vecWidth, rank_ = 2; - /** For an iterator, the vectorized result for width N is always a - TinyVector. */ - template struct tvresult { - typedef FastTV2Iterator Type; - }; + /** For an iterator, the vectorized result for width N is always a + TinyVector. */ + template + struct tvresult + { + typedef FastTV2Iterator Type; + }; - FastTM2IteratorBase(const T_iterator& x) + FastTM2IteratorBase(const T_iterator &x) : data_(x.data_), array_(x.array_) - { } + { + } - void operator=(const T_iterator& x) + void operator=(const T_iterator &x) { BZPRECONDITION(0); // doesn't this copy the data in x.array_ and then make data_ // point to x's array? doesn't seem right - array_ = x.array_; - data_ = x.data_; - stack_ = x.stack_; - //stride_ = x.stride_; + array_ = x.array_; + data_ = x.data_; + stack_ = x.stack_; + // stride_ = x.stride_; } - FastTM2IteratorBase(const T_matrix& array) + FastTM2IteratorBase(const T_matrix &array) : array_(array) { - data_ = array_.data(); + data_ = array_.data(); } ~FastTM2IteratorBase() - { } + { + } - T_result operator()(TinyVector i) const + T_result operator()(TinyVector i) const { - return array_(i); + return array_(i); } - static int ascending(const int r) + static int ascending(const int r) { - if (r domain() const { return T_matrix::domain(); }; + + // RectDomain domain() const { return T_matrix::domain(); }; T_result first_value() const { return *data_; } T_result operator*() const - { return *data_; } + { + return *data_; + } T_result operator[](int i) const - { return data_[i * stride_]; } + { + return data_[i * stride_]; + } T_result fastRead(diffType i) const - { return array_.fastRead(i); } + { + return array_.fastRead(i); + } + + template + typename tvresult::Type fastRead_tv(diffType i) const + { + return typename tvresult::Type(*reinterpret_cast *>(&data_[i])); + } - template - typename tvresult::Type fastRead_tv(diffType i) const - { - return typename tvresult::Type(*reinterpret_cast*>(&data_[i])); } + /** Since data_ is simd aligned by construction, we just have + to check the offest. */ + bool isVectorAligned(diffType offset) const + { + return (offset % simdTypes::vecWidth == 0) ? true : false; + } - /** Since data_ is simd aligned by construction, we just have - to check the offest. */ - bool isVectorAligned(diffType offset) const - { return (offset%simdTypes::vecWidth==0) ? true : false; } + static int suggestStride(int r) + { + return T_matrix::stride(r); + } - static int suggestStride(int r) - { return T_matrix::stride(r); } - - static bool isStride(int r, diffType stride) - { return T_matrix::stride(r) == stride; } + static bool isStride(int r, diffType stride) + { + return T_matrix::stride(r) == stride; + } void push(int position) { - stack_[position] = data_; + stack_[position] = data_; } - + void pop(int position) - { - data_ = stack_[position]; + { + data_ = stack_[position]; } void advance() @@ -198,283 +219,305 @@ class FastTM2IteratorBase { void loadStride(int rank) { - stride_ = T_matrix::stride(rank); + stride_ = T_matrix::stride(rank); } - // This is used as lvalue, so it should return the actual data - const T_numtype * restrict data() const - { return data_; } + // This is used as lvalue, so it should return the actual data + const T_numtype *restrict data() const + { + return data_; + } - const T_matrix& array() const - {return array_; } + const T_matrix &array() const + { + return array_; + } - void _bz_setData(const T_numtype* ptr) - { data_ = ptr; } + void _bz_setData(const T_numtype *ptr) + { + data_ = ptr; + } // this is needed for the stencil expression fastRead to work void _bz_offsetData(sizeType i) - { data_ += i;} + { + data_ += i; + } // and these are needed for stencil expression shift to work void _bz_offsetData(sizeType offset, int dim) - { data_ += offset*T_matrix::stride(dim); } - + { + data_ += offset * T_matrix::stride(dim); + } + void _bz_offsetData(sizeType offset1, int dim1, sizeType offset2, int dim2) - { data_ += offset1*T_matrix::stride(dim1); - data_ += offset2*T_matrix::stride(dim2); } + { + data_ += offset1 * T_matrix::stride(dim1); + data_ += offset2 * T_matrix::stride(dim2); + } int stride() const - { return stride_; } + { + return stride_; + } - static bool isUnitStride(int r) - { return T_matrix::stride(r) == 1; } + static bool isUnitStride(int r) + { + return T_matrix::stride(r) == 1; + } - bool isUnitStride() const - { return stride() == 1; } + bool isUnitStride() const + { + return stride() == 1; + } void advanceUnitStride() - { ++data_; } - - bool canCollapse(int outerLoopRank, int innerLoopRank) const - { return T_matrix::canCollapse(outerLoopRank, innerLoopRank); } + { + ++data_; + } - template - bool shapeCheck(const T_shape& s) const - { return areShapesConformable(s, T_matrix::length()); } + bool canCollapse(int outerLoopRank, int innerLoopRank) const + { + return T_matrix::canCollapse(outerLoopRank, innerLoopRank); + } - /* - // Experimental - T_numtype& operator()(int i) const + template + bool shapeCheck(const T_shape &s) const { - return (T_numtype&)data_[i*T_matrix::stride(0)]; + return areShapesConformable(s, T_matrix::length()); } + /* + // Experimental + T_numtype& operator()(int i) const + { + return (T_numtype&)data_[i*T_matrix::stride(0)]; + } + + // Experimental + T_numtype& operator()(int i, int j) const + { + return (T_numtype&)data_[i*T_matrix::stride(0) + j*T_matrix::stride(1)]; + } + + // Experimental + + void moveTo(int i) + { + data_ = &const_cast(array_)(i); + } + + void moveTo(int i, int j) + { + data_ = &const_cast(array_)(i,j); + } + + template + void moveTo(const TinyVector& i) + { + data_ = &const_cast(array_)(i); + } + + // Experimental + void operator=(T_numtype x) + { *const_cast(data_) = x; } + + // Experimental + template + void operator=(T_value x) + { *const_cast(data_) = x; } + + // Experimental + template + void operator+=(T_value x) + { *const_cast(data_) += x; } + + // NEEDS_WORK: other operators + + // Experimental + operator T_numtype() const + { return *data_; } + */ + // Experimental - T_numtype& operator()(int i, int j) const + T_result shift(int offset, int dim) const { - return (T_numtype&)data_[i*T_matrix::stride(0) + j*T_matrix::stride(1)]; + return data_[offset * T_matrix::stride(dim)]; } // Experimental + T_result shift(int offset1, int dim1, int offset2, int dim2) const + { + return data_[offset1 * T_matrix::stride(dim1) + offset2 * T_matrix::stride(dim2)]; + } - void moveTo(int i) + void prettyPrint(std::string &str, + prettyPrintFormat &format) const { - data_ = &const_cast(array_)(i); + if (format.tersePrintingSelected()) + str += format.nextArrayOperandSymbol(); + else if (format.dumpArrayShapesMode()) + { +#ifdef BZ_HAVE_STD + std::ostringstream ostr; +#else + ostrstream ostr; +#endif + ostr << T_matrix::shape(); + str += ostr.str(); + } + else + { + str += "TinyMatrix<"; + str += BZ_DEBUG_TEMPLATE_AS_STRING_LITERAL(T_numtype); + str += ","; + + char tmpBuf[TEMP_SIZE_BUFFER] = {0}; + snprintf(tmpBuf, TEMP_SIZE_BUFFER, "%d", N_rows); + + str += tmpBuf; + str += ","; + snprintf(tmpBuf, TEMP_SIZE_BUFFER, "%d", N_columns); + + str += tmpBuf; + str += ">"; + } } - void moveTo(int i, int j) + // tiny matrices can't be sliced + template + class SliceInfo + { + public: + typedef void T_slice; + }; + + protected: + const T_numtype *restrict data_; + P_arraytype array_; + ConstPointerStack stack_; + diffType stride_; + }; + + template + class FastTM2CopyIterator; + + template + class FastTM2Iterator : public FastTM2IteratorBase &> + { + public: + typedef FastTM2IteratorBase &> + T_base; + typedef typename T_base::T_numtype T_numtype; + typedef typename T_base::T_matrix T_matrix; + typedef typename T_base::T_iterator T_iterator; + typedef typename T_base::T_ctorArg1 T_ctorArg1; + typedef typename T_base::T_ctorArg2 T_ctorArg2; + typedef typename T_base::T_range_result T_range_result; + + using T_base::numArrayOperands; + using T_base::numIndexPlaceholders; + using T_base::numTMOperands; + using T_base::numTVOperands; + using T_base::rank_; + + // NB: this ctor does NOT preserve stack and stride + // parameters. This is for speed purposes. + FastTM2Iterator(const FastTM2Iterator &x) + : T_base(x) { - data_ = &const_cast(array_)(i,j); } - template - void moveTo(const TinyVector& i) + FastTM2Iterator(const T_matrix &array) : T_base(array) {} + + using T_base::operator=; + void operator=(const FastTM2Iterator &x) { - data_ = &const_cast(array_)(i); + T_base::operator=(x); } - // Experimental - void operator=(T_numtype x) - { *const_cast(data_) = x; } + using T_base::operator(); - // Experimental - template - void operator=(T_value x) - { *const_cast(data_) = x; } + // template + // T_range_result operator()(const RectDomain& d) const + // { + // return T_range_result(T_base::array_(d)); + // } - // Experimental - template - void operator+=(T_value x) - { *const_cast(data_) += x; } + // template + // FastTM2CopyIterator::T_slice::rank> + // operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9, T10 r10, T11 r11) const + // { + // typedef FastTM2CopyIterator::T_slice::rank> slice; - // NEEDS_WORK: other operators - - // Experimental - operator T_numtype() const - { return *data_; } - */ + // return slice(array_(r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11)); + // } + }; - // Experimental - T_result shift(int offset, int dim) const + /* This version of the FastTM2Iterator makes a COPY of the array + it's pointing to. This makes it possible to return expressions of + arrays that have gone out of scope, or to slice expressions. */ + template + class FastTM2CopyIterator : public FastTM2IteratorBase> + { + public: + typedef FastTM2IteratorBase> + T_base; + typedef typename T_base::T_numtype T_numtype; + typedef typename T_base::T_matrix T_matrix; + typedef typename T_base::T_iterator T_iterator; + typedef typename T_base::T_ctorArg1 T_ctorArg1; + typedef typename T_base::T_ctorArg2 T_ctorArg2; + typedef typename T_base::T_range_result T_range_result; + + using T_base::numArrayOperands; + using T_base::numIndexPlaceholders; + using T_base::numTMOperands; + using T_base::numTVOperands; + using T_base::rank_; + + // NB: this ctor does NOT preserve stack and stride + // parameters. This is for speed purposes. + FastTM2CopyIterator(const FastTM2CopyIterator &x) + : T_base(x) { - return data_[offset*T_matrix::stride(dim)]; } - // Experimental - T_result shift(int offset1, int dim1, int offset2, int dim2) const + FastTM2CopyIterator(const T_matrix &array) : T_base(array) {} + + using T_base::operator=; + void operator=(const FastTM2CopyIterator &x) { - return data_[offset1*T_matrix::stride(dim1) - + offset2*T_matrix::stride(dim2)]; + T_base::operator=(x); } - void prettyPrint(std::string &str, - prettyPrintFormat& format) const - { - if (format.tersePrintingSelected()) - str += format.nextArrayOperandSymbol(); - else if (format.dumpArrayShapesMode()) - { -#ifdef BZ_HAVE_STD - std::ostringstream ostr; -#else - ostrstream ostr; -#endif - ostr << T_matrix::shape(); - str += ostr.str(); - } - else { - str += "TinyMatrix<"; - str += BZ_DEBUG_TEMPLATE_AS_STRING_LITERAL(T_numtype); - str += ","; - - char tmpBuf[10]; - sprintf(tmpBuf, "%d", N_rows); - - str += tmpBuf; - str += ","; - sprintf(tmpBuf, "%d", N_columns); - - str += tmpBuf; - str += ">"; - } - } - - // tiny matrices can't be sliced - template - class SliceInfo { - public: - typedef void T_slice; -}; - -protected: - const T_numtype * restrict data_; - P_arraytype array_; - ConstPointerStack stack_; - diffType stride_; -}; - - -template -class FastTM2CopyIterator; - - -template -class FastTM2Iterator : - public FastTM2IteratorBase&> -{ -public: - typedef FastTM2IteratorBase&> T_base; - typedef typename T_base::T_numtype T_numtype; - typedef typename T_base::T_matrix T_matrix; - typedef typename T_base::T_iterator T_iterator; - typedef typename T_base::T_ctorArg1 T_ctorArg1; - typedef typename T_base::T_ctorArg2 T_ctorArg2; - typedef typename T_base::T_range_result T_range_result; - - using T_base::rank_; - using T_base::numArrayOperands; - using T_base::numTVOperands; - using T_base::numTMOperands; - using T_base::numIndexPlaceholders; - - // NB: this ctor does NOT preserve stack and stride - // parameters. This is for speed purposes. - FastTM2Iterator(const FastTM2Iterator& x) - : T_base(x) - { } - - FastTM2Iterator(const T_matrix& array) : T_base(array) {} - - using T_base::operator=; - void operator=(const FastTM2Iterator& x) - { - T_base::operator=(x); - } - - using T_base::operator(); - - // template - // T_range_result operator()(const RectDomain& d) const - // { - // return T_range_result(T_base::array_(d)); - // } - - // template - // FastTM2CopyIterator::T_slice::rank> - // operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9, T10 r10, T11 r11) const - // { - // typedef FastTM2CopyIterator::T_slice::rank> slice; - - // return slice(array_(r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11)); - // } - -}; - -/* This version of the FastTM2Iterator makes a COPY of the array - it's pointing to. This makes it possible to return expressions of - arrays that have gone out of scope, or to slice expressions. */ -template -class FastTM2CopyIterator : - public FastTM2IteratorBase > -{ -public: - typedef FastTM2IteratorBase > T_base; - typedef typename T_base::T_numtype T_numtype; - typedef typename T_base::T_matrix T_matrix; - typedef typename T_base::T_iterator T_iterator; - typedef typename T_base::T_ctorArg1 T_ctorArg1; - typedef typename T_base::T_ctorArg2 T_ctorArg2; - typedef typename T_base::T_range_result T_range_result; - - using T_base::rank_; - using T_base::numArrayOperands; - using T_base::numTVOperands; - using T_base::numTMOperands; - using T_base::numIndexPlaceholders; - - - // NB: this ctor does NOT preserve stack and stride - // parameters. This is for speed purposes. - FastTM2CopyIterator(const FastTM2CopyIterator& x) - : T_base(x) - { } - - FastTM2CopyIterator(const T_matrix& array) : T_base(array) { } - - using T_base::operator=; - void operator=(const FastTM2CopyIterator& x) - { - T_base::operator=(x); - } - - using T_base::operator(); - - // template - // T_range_result operator()(const RectDomain& d) const - // { - // return T_range_result(T_base::array_(d)); - // } - - // template - // FastTM2CopyIterator::T_slice::rank> - // operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9, T10 r10, T11 r11) const - // { - // typedef FastTM2CopyIterator::T_slice::rank> slice; - - // return slice(array_(r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11)); - // } -}; + using T_base::operator(); + // template + // T_range_result operator()(const RectDomain& d) const + // { + // return T_range_result(T_base::array_(d)); + // } + + // template + // FastTM2CopyIterator::T_slice::rank> + // operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9, T10 r10, T11 r11) const + // { + // typedef FastTM2CopyIterator::T_slice::rank> slice; + + // return slice(array_(r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11)); + // } + }; } diff --git a/blitz/tv2fastiter.h b/blitz/tv2fastiter.h index 482a6365..0ba7a832 100644 --- a/blitz/tv2fastiter.h +++ b/blitz/tv2fastiter.h @@ -8,7 +8,7 @@ * * This file is a part of Blitz. * - * Blitz is free software: you can redistribute it and/or modify + * Blitz is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. @@ -18,11 +18,11 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * - * You should have received a copy of the GNU Lesser General Public + * You should have received a copy of the GNU Lesser General Public * License along with Blitz. If not, see . - * + * * Suggestions: blitz-devel@lists.sourceforge.net - * Bugs: blitz-support@lists.sourceforge.net + * Bugs: blitz-support@lists.sourceforge.net * * For more information, please see the Blitz++ Home Page: * https://sourceforge.net/projects/blitz/ @@ -34,9 +34,9 @@ #include #ifdef BZ_HAVE_STD - #include +#include #else - #include +#include #endif #include @@ -45,144 +45,171 @@ #include #include -namespace blitz { - -// forward declaration -template class FastTV2Iterator; -template class FastTV2CopyIterator; - - -template -class FastTV2IteratorBase { -public: - typedef P_numtype T_numtype; - typedef typename opType::T_optype T_optype; - // if T_numtype is POD, then T_result is T_numtype, but if T_numtype - // is an ET class, T_result will be the array class for that class. - typedef typename asET::T_wrapped T_typeprop; - typedef typename unwrapET::T_unwrapped T_result; - - /** Result type for fastRead_tv is a FastTVIterator. This is only - used for mixed TV/Array expressions. */ - typedef ETBase::vecWidth> > T_tvtypeprop; - typedef typename unwrapET::T_unwrapped T_tvresult; - - typedef TinyVector T_vector; - typedef FastTV2IteratorBase T_iterator; - typedef const T_vector& T_ctorArg1; - typedef int T_ctorArg2; // dummy - typedef FastTV2CopyIterator T_range_result; - - static const int - numArrayOperands = 0, - numTVOperands = 1, - numTMOperands = 0, +namespace blitz +{ + + // forward declaration + template + class FastTV2Iterator; + template + class FastTV2CopyIterator; + + template + class FastTV2IteratorBase + { + public: + typedef P_numtype T_numtype; + typedef typename opType::T_optype T_optype; + // if T_numtype is POD, then T_result is T_numtype, but if T_numtype + // is an ET class, T_result will be the array class for that class. + typedef typename asET::T_wrapped T_typeprop; + typedef typename unwrapET::T_unwrapped T_result; + + /** Result type for fastRead_tv is a FastTVIterator. This is only + used for mixed TV/Array expressions. */ + typedef ETBase::vecWidth>> + T_tvtypeprop; + typedef typename unwrapET::T_unwrapped T_tvresult; + + typedef TinyVector T_vector; + typedef FastTV2IteratorBase T_iterator; + typedef const T_vector &T_ctorArg1; + typedef int T_ctorArg2; // dummy + typedef FastTV2CopyIterator T_range_result; + + static const int + numArrayOperands = 0, + numTVOperands = 1, + numTMOperands = 0, numIndexPlaceholders = 0, - minWidth = simdTypes::vecWidth, - maxWidth = simdTypes::vecWidth, + minWidth = simdTypes::vecWidth, + maxWidth = simdTypes::vecWidth, rank_ = 1; - /** For an iterator, the vectorized result for width N is always a - TinyVector. */ - template struct tvresult { - typedef FastTV2Iterator Type; - }; + /** For an iterator, the vectorized result for width N is always a + TinyVector. */ + template + struct tvresult + { + typedef FastTV2Iterator Type; + }; - FastTV2IteratorBase(const T_iterator& x) + FastTV2IteratorBase(const T_iterator &x) : data_(x.data_), array_(x.array_) - { } + { + } - void operator=(const T_iterator& x) + void operator=(const T_iterator &x) { BZPRECONDITION(0); // doesn't this copy the data in x.array_ and then make data_ // point to x's array? doesn't seem right - array_ = x.array_; - data_ = x.data_; - stack_ = x.stack_; - //stride_ = x.stride_; + array_ = x.array_; + data_ = x.data_; + stack_ = x.stack_; + // stride_ = x.stride_; } - FastTV2IteratorBase(const T_vector& array) + FastTV2IteratorBase(const T_vector &array) : array_(array) { - data_ = array_.data(); + data_ = array_.data(); } ~FastTV2IteratorBase() - { } + { + } T_numtype operator()(int i) const - { return array_[i]; } + { + return array_[i]; + } - T_result operator()(TinyVector i) const + T_result operator()(TinyVector i) const { - BZPRECONDITION(array_.lengthCheck(i[0])); - return array_[i[0]]; + BZPRECONDITION(array_.lengthCheck(i[0])); + return array_[i[0]]; } int ascending(const int r) const { - BZPRECONDITION(r==0); + BZPRECONDITION(r == 0); return true; } int ordering(const int r) const { - BZPRECONDITION(r==0); + BZPRECONDITION(r == 0); return 0; } int lbound(const int r) const - { - BZPRECONDITION(r==0); + { + BZPRECONDITION(r == 0); return 0; } int ubound(const int r) const - { - BZPRECONDITION(r==0); - return N_length-1; + { + BZPRECONDITION(r == 0); + return N_length - 1; } - - //RectDomain domain() const { return array_.domain(); }; + + // RectDomain domain() const { return array_.domain(); }; T_result first_value() const { return *data_; } T_result operator*() const - { return *data_; } + { + return *data_; + } T_result operator[](int i) const - { return data_[i * stride_]; } + { + return data_[i * stride_]; + } T_result fastRead(diffType i) const - { return array_.fastRead(i); } + { + return array_.fastRead(i); + } - template - typename tvresult::Type fastRead_tv(diffType i) const - { - return typename tvresult::Type(*reinterpret_cast*>(&data_[i])); } + template + typename tvresult::Type fastRead_tv(diffType i) const + { + return typename tvresult::Type(*reinterpret_cast *>(&data_[i])); + } - /** Since data_ is simd aligned by construction, we just have - to check the offest. */ - bool isVectorAligned(diffType offset) const - { return (offset%simdTypes::vecWidth==0) ? true : false; } + /** Since data_ is simd aligned by construction, we just have + to check the offest. */ + bool isVectorAligned(diffType offset) const + { + return (offset % simdTypes::vecWidth == 0) ? true : false; + } int suggestStride(int r) const - { BZPRECONDITION(r==0); return stride_; } + { + BZPRECONDITION(r == 0); + return stride_; + } bool isStride(int r, diffType stride) const - { BZPRECONDITION(r==0); return stride==stride_; } + { + BZPRECONDITION(r == 0); + return stride == stride_; + } void push(int position) { - BZPRECONDITION(position==0); stack_[position] = data_; + BZPRECONDITION(position == 0); + stack_[position] = data_; } - + void pop(int position) - { - BZPRECONDITION(position==0); data_ = stack_[position]; + { + BZPRECONDITION(position == 0); + data_ = stack_[position]; } void advance() @@ -197,297 +224,316 @@ class FastTV2IteratorBase { void loadStride(int r) { - BZPRECONDITION(r==0); //stride_ = 1; + BZPRECONDITION(r == 0); // stride_ = 1; } - // This is used as lvalue, so it should return the actual data - const T_numtype * restrict data() const - { return data_; } + // This is used as lvalue, so it should return the actual data + const T_numtype *restrict data() const + { + return data_; + } - const T_vector& array() const - {return array_; } + const T_vector &array() const + { + return array_; + } - void _bz_setData(const T_numtype* ptr) - { BZPRECONDITION(0); //data_ = ptr; - } + void _bz_setData(const T_numtype *ptr) + { + BZPRECONDITION(0); // data_ = ptr; + } // this is needed for the stencil expression fastRead to work void _bz_offsetData(sizeType i) - { BZPRECONDITION(0); //data_ += i; - } + { + BZPRECONDITION(0); // data_ += i; + } // and these are needed for stencil expression shift to work void _bz_offsetData(sizeType offset, int dim) - { BZPRECONDITION(0); //data_ += offset*array_.stride(dim); - } - + { + BZPRECONDITION(0); // data_ += offset*array_.stride(dim); + } + void _bz_offsetData(sizeType offset1, int dim1, sizeType offset2, int dim2) - { BZPRECONDITION(0); //data_ += offset1*array_.stride(dim1); - //data_ += offset2*array_.stride(dim2); - } + { + BZPRECONDITION(0); // data_ += offset1*array_.stride(dim1); + // data_ += offset2*array_.stride(dim2); + } int stride() const - { return stride_; } + { + return stride_; + } bool isUnitStride(int r) const - { BZPRECONDITION(r==0); return stride_ == 1; } + { + BZPRECONDITION(r == 0); + return stride_ == 1; + } bool isUnitStride() const - { return stride_ == 1; } + { + return stride_ == 1; + } void advanceUnitStride() - { ++data_; } - - bool canCollapse(int outerLoopRank, int innerLoopRank) const - { - BZPRECONDITION(outerLoopRank==0); - BZPRECONDITION(innerLoopRank==0); - return true; - } - - template - bool shapeCheck(const T_shape& s) const - { return areShapesConformable(s, TinyVector(N_length)); } + { + ++data_; + } - /* - // Experimental - T_numtype& operator()(int i) const + bool canCollapse(int outerLoopRank, int innerLoopRank) const { - return (T_numtype&)data_[i*array_.stride(0)]; + BZPRECONDITION(outerLoopRank == 0); + BZPRECONDITION(innerLoopRank == 0); + return true; } - // Experimental - T_numtype& operator()(int i, int j) const + template + bool shapeCheck(const T_shape &s) const { - return (T_numtype&)data_[i*array_.stride(0) + j*array_.stride(1)]; + return areShapesConformable(s, TinyVector(N_length)); } - // Experimental - T_numtype& operator()(int i, int j, int k) const + /* + // Experimental + T_numtype& operator()(int i) const + { + return (T_numtype&)data_[i*array_.stride(0)]; + } + + // Experimental + T_numtype& operator()(int i, int j) const + { + return (T_numtype&)data_[i*array_.stride(0) + j*array_.stride(1)]; + } + + // Experimental + T_numtype& operator()(int i, int j, int k) const + { + return (T_numtype&)data_[i*array_.stride(0) + + j*array_.stride(1) + + k*array_.stride(2)]; + } + + // Experimental + + void moveTo(int i) + { + data_ = &const_cast(array_)(i); + } + + void moveTo(int i, int j) + { + data_ = &const_cast(array_)(i,j); + } + + void moveTo(int i, int j, int k) + { + data_ = &const_cast(array_)(i,j,k); + } + + template + void moveTo(const TinyVector& i) + { + data_ = &const_cast(array_)(i); + } + + // Experimental + void operator=(T_numtype x) + { *const_cast(data_) = x; } + + // Experimental + template + void operator=(T_value x) + { *const_cast(data_) = x; } + + // Experimental + template + void operator+=(T_value x) + { *const_cast(data_) += x; } + + // NEEDS_WORK: other operators + + // Experimental + operator T_numtype() const + { return *data_; } + */ + + // Experimental + T_result shift(int offset, int dim) const { - return (T_numtype&)data_[i*array_.stride(0) - + j*array_.stride(1) - + k*array_.stride(2)]; + return data_[offset * array_.stride(dim)]; } // Experimental - - void moveTo(int i) + T_result shift(int offset1, int dim1, int offset2, int dim2) const { - data_ = &const_cast(array_)(i); + return data_[offset1 * array_.stride(dim1) + offset2 * array_.stride(dim2)]; } - void moveTo(int i, int j) + void prettyPrint(std::string &str, + prettyPrintFormat &format) const { - data_ = &const_cast(array_)(i,j); + if (format.tersePrintingSelected()) + str += format.nextArrayOperandSymbol(); + else if (format.dumpArrayShapesMode()) + { +#ifdef BZ_HAVE_STD + std::ostringstream ostr; +#else + ostrstream ostr; +#endif + ostr << array_.shape(); + str += ostr.str(); + } + else + { + str += "TinyVector<"; + str += BZ_DEBUG_TEMPLATE_AS_STRING_LITERAL(T_numtype); + str += ","; + + char tmpBuf[TEMP_SIZE_BUFFER] = {0}; + snprintf(tmpBuf, TEMP_SIZE_BUFFER, "%d", N_length); + + str += tmpBuf; + str += ">"; + } } - void moveTo(int i, int j, int k) + // vectors can't be sliced + template + class SliceInfo + { + public: + typedef void T_slice; + }; + + protected: + const T_numtype *restrict data_; + P_arraytype array_; + ConstPointerStack stack_; + static const diffType stride_ = 1; + }; + + template + class FastTV2CopyIterator; + + template + class FastTV2Iterator : public FastTV2IteratorBase &> + { + public: + typedef FastTV2IteratorBase &> + T_base; + typedef typename T_base::T_numtype T_numtype; + typedef typename T_base::T_vector T_vector; + typedef typename T_base::T_iterator T_iterator; + typedef typename T_base::T_ctorArg1 T_ctorArg1; + typedef typename T_base::T_ctorArg2 T_ctorArg2; + typedef typename T_base::T_range_result T_range_result; + + using T_base::numArrayOperands; + using T_base::numIndexPlaceholders; + using T_base::numTMOperands; + using T_base::numTVOperands; + using T_base::rank_; + + // NB: this ctor does NOT preserve stack and stride + // parameters. This is for speed purposes. + FastTV2Iterator(const FastTV2Iterator &x) + : T_base(x) { - data_ = &const_cast(array_)(i,j,k); } - template - void moveTo(const TinyVector& i) + FastTV2Iterator(const T_vector &array) : T_base(array) {} + + using T_base::operator=; + void operator=(const FastTV2Iterator &x) { - data_ = &const_cast(array_)(i); + T_base::operator=(x); } - // Experimental - void operator=(T_numtype x) - { *const_cast(data_) = x; } + using T_base::operator(); - // Experimental - template - void operator=(T_value x) - { *const_cast(data_) = x; } + // template + // T_range_result operator()(const RectDomain& d) const + // { + // return T_range_result(T_base::array_(d)); + // } - // Experimental - template - void operator+=(T_value x) - { *const_cast(data_) += x; } + // template + // FastTV2CopyIterator::T_slice::rank> + // operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9, T10 r10, T11 r11) const + // { + // typedef FastTV2CopyIterator::T_slice::rank> slice; - // NEEDS_WORK: other operators - - // Experimental - operator T_numtype() const - { return *data_; } - */ + // return slice(array_(r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11)); + // } + }; - // Experimental - T_result shift(int offset, int dim) const + /* This version of the FastTV2Iterator makes a COPY of the array + it's pointing to. This makes it possible to return expressions of + arrays that have gone out of scope, or to slice expressions. */ + template + class FastTV2CopyIterator : public FastTV2IteratorBase> + { + public: + typedef FastTV2IteratorBase> + T_base; + typedef typename T_base::T_numtype T_numtype; + typedef typename T_base::T_vector T_vector; + typedef typename T_base::T_iterator T_iterator; + typedef typename T_base::T_ctorArg1 T_ctorArg1; + typedef typename T_base::T_ctorArg2 T_ctorArg2; + typedef typename T_base::T_range_result T_range_result; + + using T_base::numArrayOperands; + using T_base::numIndexPlaceholders; + using T_base::numTMOperands; + using T_base::numTVOperands; + using T_base::rank_; + + // NB: this ctor does NOT preserve stack and stride + // parameters. This is for speed purposes. + FastTV2CopyIterator(const FastTV2CopyIterator &x) + : T_base(x) { - return data_[offset*array_.stride(dim)]; } - // Experimental - T_result shift(int offset1, int dim1, int offset2, int dim2) const + FastTV2CopyIterator(const T_vector &array) : T_base(array) {} + + using T_base::operator=; + void operator=(const FastTV2CopyIterator &x) { - return data_[offset1*array_.stride(dim1) - + offset2*array_.stride(dim2)]; + T_base::operator=(x); } - void prettyPrint(std::string &str, - prettyPrintFormat& format) const - { - if (format.tersePrintingSelected()) - str += format.nextArrayOperandSymbol(); - else if (format.dumpArrayShapesMode()) - { -#ifdef BZ_HAVE_STD - std::ostringstream ostr; -#else - ostrstream ostr; -#endif - ostr << array_.shape(); - str += ostr.str(); - } - else { - str += "TinyVector<"; - str += BZ_DEBUG_TEMPLATE_AS_STRING_LITERAL(T_numtype); - str += ","; - - char tmpBuf[10]; - sprintf(tmpBuf, "%d", N_length); - - str += tmpBuf; - str += ">"; - } - } - - // vectors can't be sliced - template - class SliceInfo { - public: - typedef void T_slice; -}; - -protected: - const T_numtype * restrict data_; - P_arraytype array_; - ConstPointerStack stack_; - static const diffType stride_=1; -}; - - -template class FastTV2CopyIterator; - -template -class FastTV2Iterator : - public FastTV2IteratorBase&> -{ -public: - typedef FastTV2IteratorBase&> T_base; - typedef typename T_base::T_numtype T_numtype; - typedef typename T_base::T_vector T_vector; - typedef typename T_base::T_iterator T_iterator; - typedef typename T_base::T_ctorArg1 T_ctorArg1; - typedef typename T_base::T_ctorArg2 T_ctorArg2; - typedef typename T_base::T_range_result T_range_result; - - using T_base::rank_; - using T_base::numArrayOperands; - using T_base::numTVOperands; - using T_base::numTMOperands; - using T_base::numIndexPlaceholders; - - // NB: this ctor does NOT preserve stack and stride - // parameters. This is for speed purposes. - FastTV2Iterator(const FastTV2Iterator& x) - : T_base(x) - { } - - FastTV2Iterator(const T_vector& array) : T_base(array) {} - - using T_base::operator=; - void operator=(const FastTV2Iterator& x) - { - T_base::operator=(x); - } - - using T_base::operator(); - - // template - // T_range_result operator()(const RectDomain& d) const - // { - // return T_range_result(T_base::array_(d)); - // } - - // template - // FastTV2CopyIterator::T_slice::rank> - // operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9, T10 r10, T11 r11) const - // { - // typedef FastTV2CopyIterator::T_slice::rank> slice; - - // return slice(array_(r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11)); - // } - -}; - -/* This version of the FastTV2Iterator makes a COPY of the array - it's pointing to. This makes it possible to return expressions of - arrays that have gone out of scope, or to slice expressions. */ -template -class FastTV2CopyIterator : - public FastTV2IteratorBase > -{ -public: - typedef FastTV2IteratorBase > T_base; - typedef typename T_base::T_numtype T_numtype; - typedef typename T_base::T_vector T_vector; - typedef typename T_base::T_iterator T_iterator; - typedef typename T_base::T_ctorArg1 T_ctorArg1; - typedef typename T_base::T_ctorArg2 T_ctorArg2; - typedef typename T_base::T_range_result T_range_result; - - using T_base::rank_; - using T_base::numArrayOperands; - using T_base::numTVOperands; - using T_base::numTMOperands; - using T_base::numIndexPlaceholders; - - - // NB: this ctor does NOT preserve stack and stride - // parameters. This is for speed purposes. - FastTV2CopyIterator(const FastTV2CopyIterator& x) - : T_base(x) - { } - - FastTV2CopyIterator(const T_vector& array) : T_base(array) { } - - using T_base::operator=; - void operator=(const FastTV2CopyIterator& x) - { - T_base::operator=(x); - } - - using T_base::operator(); - - // template - // T_range_result operator()(const RectDomain& d) const - // { - // return T_range_result(T_base::array_(d)); - // } - - // template - // FastTV2CopyIterator::T_slice::rank> - // operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9, T10 r10, T11 r11) const - // { - // typedef FastTV2CopyIterator::T_slice::rank> slice; - - // return slice(array_(r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11)); - // } -}; + using T_base::operator(); + + // template + // T_range_result operator()(const RectDomain& d) const + // { + // return T_range_result(T_base::array_(d)); + // } + // template + // FastTV2CopyIterator::T_slice::rank> + // operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9, T10 r10, T11 r11) const + // { + // typedef FastTV2CopyIterator::T_slice::rank> slice; + + // return slice(array_(r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11)); + // } + }; }