Finished the changes towards complex datatypes from C99. Fixes issue 70.

Added defines to simplify adding pragmas to ignore warnings in gcc and loop_count in icc. The ignore pragmas are used in several places to suppress warnings about conversion from doublecomplex* to complex*: - calls to Temperton FFT routines. - copying data in SSE3 optimizations (here explicit pointer casts were also added). Added comments in several places concerning the portability of such conversions. icc pragmas are now visible only when compiling with icc. Inclusion of clAmdFft.h is now enclosed in pragmas to ignore existent warning. Calling of IGT Fortran routines was slightly changed, conversion from double to complex has been moved inside the C file from Fortran routine. tests/2exec/comp2exec was improved by adding a common reference path (to search all reference binaries). Added '-pol lak' to suite.
adda-team · Aug 1, 2013 · 653fbf5 · 653fbf5
1 parent 057298c
commit 653fbf5
Show file tree

Hide file tree

Showing 13 changed files with 143 additions and 74 deletions.
diff --git a/src/Makefile b/src/Makefile
@@ -329,8 +329,7 @@ ifeq ($(COMPILER),gnu)
   COPT1   := -O2
   COPT2   := -O3 -ffast-math -funroll-loops
   CWARN   := -Wall -Wextra -Wcast-qual -Wpointer-arith -Wwrite-strings -Wstrict-prototypes \
-             -Wstrict-aliasing=1 -Wshadow -Wcast-align -Wnested-externs -Wcomment -Wno-unknown-pragmas \
-             -Wno-overlength-strings
+             -Wstrict-aliasing=1 -Wshadow -Wcast-align -Wnested-externs -Wcomment -Wno-overlength-strings
   # gcc versions prior to 4.7.2 are affected by bug http://gcc.gnu.org/bugzilla/show_bug.cgi?id=7263 , which causes
   # -pedantic flag to generates warnings on every occurence of I (complex i)
   GCC_GTEQ_472 := $(shell expr `gcc -dumpversion | sed -e 's/\.\([0-9][0-9]\)/\1/g' -e 's/\.\([0-9]\)/0\1/g' \

diff --git a/src/comm.c b/src/comm.c
@@ -154,9 +154,8 @@ static MPI_Datatype MPIVarType(var_type type,bool reduce,int *mult)
  * directly used. In this case we emulate more complex datatypes through multiplication of double, and additional
  * variable 'mult' is returned to account for this factor.
  *
- * Reduction of complex numbers is emulated if not supported; the emulation is not perfectly portable - depends on a
- * particular implementation of complex numbers. The good thing is that it is only used for old (less modern) MPI
- * implementations.
+ * Reduction of complex numbers is emulated if not supported; C99 implies that this emulation is portable. Anyway, it
+ * is only used for old (less modern) MPI implementations.
  */
 {
 	if (reduce) *mult=1; // default value when direct correspondence is possible

diff --git a/src/fft.c b/src/fft.c
@@ -34,7 +34,9 @@
 #include <string.h>
 
 #ifdef CLFFT_AMD
+	IGNORE_WARNING(-Wstrict-prototypes) // no way to change the library header
 #	include <clAmdFft.h> //external library from AMD
+	STOP_IGNORE
 	// Defines precision of clAmdFft transforms. !!! CLFFT_DOUBLE_FAST should be tested when becomes operational
 #	define PRECISION_CLFFT CLFFT_DOUBLE
 #elif defined(CLFFT_APPLE)
@@ -318,8 +320,15 @@ void fftX(const int isign)
 #elif defined(FFT_TEMPERTON)
 	int nn=gridX,inc=1,jump=nn,lot=boxY;
 	size_t z;
-
+	/* Calls to Temperton FFT cause warnings for translation from doublecomplex to double pointers. However, such a cast
+	 * is perfectly valid in C99. So we set pragmas to remove these warnings.
+	 *
+	 * !!! TODO: Another (ultimate) solution is to remove this routine altogether, since FFTW is perfect in all
+	 * respects. This is also reasonable considering future switch to tgmath.h
+	 */
+	IGNORE_WARNING(-Wstrict-aliasing);
 	for (z=0;z<3*local_Nz;z++) cfft99_((double *)(Xmatrix+z*gridX*smallY),work,trigsX,ifaxX,&inc,&jump,&nn,&lot,&isign);
+	STOP_IGNORE;
 #endif
 }
 
@@ -343,7 +352,9 @@ void fftY(const int isign)
 #elif defined(FFT_TEMPERTON)
 	int nn=gridY,inc=1,jump=nn,lot=3*gridZ;
 
+	IGNORE_WARNING(-Wstrict-aliasing);
 	cfft99_((double *)(slices_tr),work,trigsY,ifaxY,&inc,&jump,&nn,&lot,&isign);
+	STOP_IGNORE;
 #endif
 }
 
@@ -367,7 +378,9 @@ void fftZ(const int isign)
 #elif defined(FFT_TEMPERTON)
 	int nn=gridZ,inc=1,jump=nn,lot=boxY,Xcomp;
 
+	IGNORE_WARNING(-Wstrict-aliasing);
 	for (Xcomp=0;Xcomp<3;Xcomp++) cfft99_((double *)(slices+gridYZ*Xcomp),work,trigsZ,ifaxZ,&inc,&jump,&nn,&lot,&isign);
+	STOP_IGNORE;
 #endif
 }
 
@@ -382,7 +395,9 @@ static void fftX_Dm(const size_t lengthZ ONLY_FOR_TEMPERTON)
 	int nn=gridX,inc=1,jump=nn,lot=D2sizeY,isign=FFT_FORWARD;
 	size_t z;
 
+	IGNORE_WARNING(-Wstrict-aliasing);
 	for (z=0;z<lengthZ;z++) cfft99_((double *)(D2matrix+z*gridX*D2sizeY),work,trigsX,ifaxX,&inc,&jump,&nn,&lot,&isign);
+	STOP_IGNORE;
 #endif
 }
 
@@ -396,7 +411,9 @@ static void fftY_Dm(void)
 #elif defined(FFT_TEMPERTON)
 	int nn=gridY,inc=1,jump=nn,lot=gridZ,isign=FFT_FORWARD;
 
+	IGNORE_WARNING(-Wstrict-aliasing);
 	cfft99_((double *)slice_tr,work,trigsY,ifaxY,&inc,&jump,&nn,&lot,&isign);
+	STOP_IGNORE;
 #endif
 }
 
@@ -410,7 +427,9 @@ static void fftZ_Dm(void)
 #elif defined(FFT_TEMPERTON)
 	int nn=gridZ,inc=1,jump=nn,lot=gridY,isign=FFT_FORWARD;
 
+	IGNORE_WARNING(-Wstrict-aliasing);
 	cfft99_((double *)slice,work,trigsZ,ifaxZ,&inc,&jump,&nn,&lot,&isign);
+	STOP_IGNORE;
 #endif
 }
 
@@ -531,6 +550,9 @@ static void fftInitAfterD(void)
  * completely separate code is used for OpenCL and FFTW3, because even precise-timing output is significantly different.
  * In particular, FFTW3 uses separate plans for forward and backward, while clFFT (by Apple or AMD) uses one plan for
  * both directions.
+ *
+ * clFft access the OpenCL buffers directly, so they are not anyhow affected by the definition of complex numbers in the
+ * main part of the code (although, it is consistent with it)
  */
 {
 #ifdef OPENCL
@@ -777,7 +799,7 @@ void InitDmatrix(void)
 	CREATE_CL_BUFFER(bufslices,CL_MEM_READ_WRITE,gridYZ*3*sizeof(doublecomplex),NULL);
 	CREATE_CL_BUFFER(bufslices_tr,CL_MEM_READ_WRITE,gridYZ*3*sizeof(doublecomplex),NULL);
 	/* The following are constant device buffers which are initialized with host data. But bufDmatrix is initialized in
-	 * the end of this function (to be compatible with prognosis. And bufcc_sqrt is initialized in InitCC, since it may
+	 * the end of this function (to be compatible with prognosis). And bufcc_sqrt is initialized in InitCC, since it may
 	 * change for every run of the iterative solver.
 	 */
 	CREATE_CL_BUFFER(bufcc_sqrt,CL_MEM_READ_ONLY,sizeof(cc_sqrt),NULL);

diff --git a/src/fort/propaesplibreintadda.f b/src/fort/propaesplibreintadda.f
@@ -21,12 +21,14 @@ subroutine propaespacelibreintadda(Rij,k0a,arretecube,relreq,
       double precision k0a,arretecubem
       double precision x,y,z,arretecube,k0,xx0,yy0,zz0
       double precision Rij(3),result(12)
+c     The structure of the result is the following:
+c     Re(G11),Re(G12),Re(G13),Re(G22),Re(G23),Re(G33),Im(G11),...,Im(G33)
 
 c     Variables needs for the integration
       integer  KEY, N, NF, NDIM, MINCLS, MAXCLS, IFAIL, NEVAL, NW
       parameter (nw=4000000,ndim=3,nf=12)
       double precision A(NDIM), B(NDIM), WRKSTR(NW)
-      double precision  ABSEST(NF), FINEST(NF), ABSREQ, RELREQ,err
+      double precision  ABSEST(NF), ABSREQ, RELREQ,err
 
       double precision Id(3,3),Rab,Rvect(3)
 
@@ -68,21 +70,16 @@ subroutine propaespacelibreintadda(Rij,k0a,arretecube,relreq,
       endif
 
       call  DCUHRE(NDIM,NF,A,B, MINCLS, MAXCLS, fonctionigtadda,
-     $      ABSREQ,RELREQ,KEY,NW,0,finest,ABSEST,NEVAL,IFAIL, WRKSTR) 
+     $      ABSREQ,RELREQ,KEY,NW,0,result,ABSEST,NEVAL,IFAIL, WRKSTR) 
 
       do N = 1,NF
-         FINEST(N)=FINEST(N)/arretecube/arretecube/arretecube
+         result(N)=result(N)/arretecube/arretecube/arretecube
       enddo
 
       if (ifail.ne.0) then
          write(*,*) 'IFAIL in IGT routine',IFAIL
       endif
 
-      do i = 1,6
-        result(2*i-1)=finest(i)
-        result(2*i)=finest(i+6)
-      enddo
-
       end
 c*************************************************************
       subroutine fonctionigtadda(ndim,zz,nfun,f)

diff --git a/src/function.h b/src/function.h
@@ -1,6 +1,6 @@
 /* File: function.h
  * $Date::                            $
- * Descr: function attributes
+ * Descr: function attributes and compiler pragmas
  *
  * Copyright (C) 2006,2008,2010-2011,2013 ADDA contributors
  * This file is part of ADDA.
@@ -17,7 +17,6 @@
 #ifndef __function_h
 #define __function_h
 
-
 // attribute options for GCC compilers (Intel compiler may also recognize them)
 #ifdef __GNUC__
 	// sets a macro for testing GCC version (copied from _mingw.h)
@@ -26,6 +25,16 @@
 			(__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
 #	else
 #		define GCC_PREREQ(major, minor)  0
+#	endif
+	// pragmas to ignore warnings
+#	if GCC_PREREQ(4,6)
+#		define DO_PRAGMA(x) _Pragma (#x)
+#		define IGNORE_WARNING(x) DO_PRAGMA(GCC diagnostic ignored #x)
+#		// assume that push is not used anywhere
+#		define STOP_IGNORE       _Pragma ("GCC diagnostic pop")
+#	else
+#		define IGNORE_WARNING(x)
+#		define STOP_IGNORE
 #	endif
 	// The following chooses between __printf__ and __gnu_printf__ attributes
 #	if GCC_PREREQ(4,4)
@@ -49,11 +58,19 @@
 #	define ATT_NORETURN __attribute__ ((__noreturn__))
 #	define ATT_UNUSED   __attribute__ ((__unused__))
 #else
+#	define IGNORE_WARNING(x)
+#	define STOP_IGNORE
 #	define ATT_PRINTF(a,b)
 #	define ATT_PURE
 #	define ATT_MALLOC
 #	define ATT_NORETURN
 #	define ATT_UNUSED
 #endif
 
+#ifdef __ICC
+#	define LARGE_LOOP _Pragma ("loop_count (10000)")
+#else
+#	define LARGE_LOOP
+#endif
+
 #endif // __function_h
diff --git a/src/interaction.c b/src/interaction.c
@@ -138,8 +138,18 @@ static inline __m128d accImExp_pd(const double x)
 
 static inline doublecomplex accImExp(const double x)
 {
+	/* Here and further in the SSE3 part it is assumed that doublecomplex is equivalent to two doubles (that is
+	 * specified by the C99 standard). Explicit pointer casts have been put in place, and pragmas to ignore remaining
+	 * warnings from strict aliasing.
+	 *
+	 * !!! TODO: SSE3 code is a nice hack. But it should be considered carefully - is it worth it? In particular, it
+	 * seems that only parts of it are really beneficial (like tabulated evaluation of imaginary exponents), and those
+	 * can be incorporated into the main code (using standard C99 only).
+	 */
 	doublecomplex c;
-	_mm_store_pd(&c,accImExp_pd(x));
+	IGNORE_WARNING(-Wstrict-aliasing);
+	_mm_store_pd((double *)(&c),accImExp_pd(x));
+	STOP_IGNORE;
 	return c;
 }
 
@@ -155,26 +165,30 @@ static void CalcInterTerm_core(const double kr,const double kr2,const double inv
 	const __m128d v1 = _mm_set_pd(kr,t3);
 	const __m128d v2 = _mm_set_pd(t2,t1);
 	__m128d qff,im_re;
-	_mm_store_pd(expval,sc);
+	IGNORE_WARNING(-Wstrict-aliasing);
+	_mm_store_pd((double *)expval,sc);
+	STOP_IGNORE;
 
 #undef INTERACT_MUL
 #define INTERACT_DIAG(ind) { \
 	qff = _mm_set1_pd(qmunu[ind]); \
 	im_re = _mm_add_pd(v1,_mm_mul_pd(v2,qff)); \
 	im_re = cmul(sc,im_re); \
-	_mm_store_pd(result+ind,im_re); }
+	_mm_store_pd((double *)(result+ind),im_re); }
 #define INTERACT_NONDIAG(ind) { \
 	qff = _mm_set1_pd(qmunu[ind]); \
 	im_re = _mm_mul_pd(v2,qff); \
 	im_re = cmul(sc,im_re); \
-	_mm_store_pd(result+ind,im_re); }
+	_mm_store_pd((double *)(result+ind),im_re); }
 
+	IGNORE_WARNING(-Wstrict-aliasing);
 	INTERACT_DIAG(0);    // xx
 	INTERACT_NONDIAG(1); // xy
 	INTERACT_NONDIAG(2); // xz
 	INTERACT_DIAG(3);    // yy
 	INTERACT_NONDIAG(4); // yz
 	INTERACT_DIAG(5);    // zz
+	STOP_IGNORE;
 
 #undef INTERACT_DIAG
 #undef INTERACT_NONDIAG
@@ -716,12 +730,18 @@ void CalcInterTerm_igt(const int i,const int j,const int k,doublecomplex result[
 	double qvec[3],qmunu[6]; // unit directional vector {qx,qy,qz} and its outer-product {qxx,qxy,qxz,qyy,qyz,qzz}
 	double rn,invrn,invr3,kr,kr2; // |R/d|, 1/|R/d|, |R|^-3, kR, (kR)^2
 	doublecomplex expval; // exp(ikR)/|R|^3
-	double rtemp[3];
+	double rtemp[3],tmp[12];
+	int comp;
 
 	CalcInterParams1(i,j,k,qvec,&rn);
 	if (igt_lim==UNDEF || rn<=igt_lim) {
 		vMultScal(gridspace,qvec,rtemp);
-		propaespacelibreintadda_(rtemp,&WaveNum,&gridspace,&igt_eps,(double *)result);
+		/* passing complex vectors from Fortran to c is not necessarily portable (at least requires extra effort in
+		 * the Fortran code. So we do it through double. This is not bad for performance, since double is anyway used
+		 * internally for integration in this Fortran routine.
+		 */
+		propaespacelibreintadda_(rtemp,&WaveNum,&gridspace,&igt_eps,tmp);
+		for (comp=0;comp<6;comp++) result[comp] = tmp[comp] + I*tmp[comp+6];
 	}
 	else {
 		// The following is equivalent to CalcInterTerm_poi, except for the 1st part of initialization performed above
@@ -830,7 +850,7 @@ void InitInteraction(void)
 #ifndef NO_FORTRAN
 		case G_IGT: CalcInterTerm = &CalcInterTerm_igt; break;
 #endif
-		default: LogError(ONE_POS, "Invalid interaction term calculation method: %d",IntRelation);
+		default: LogError(ONE_POS, "Invalid interaction term calculation method: %d",(int)IntRelation);
 			// no break
 	}
 	// read tables if needed

diff --git a/src/iterative.c b/src/iterative.c
@@ -92,7 +92,7 @@ struct iter_params_struct {
 	int vec_N;        // number of additional vectors to describe the state
 	void (*func)(const enum phase); // pointer to implementation of the iterative solver
 };
-static doublecomplex dumb; // dumb variable, used in workaround for issue 146
+static doublecomplex dumb ATT_UNUSED; // dumb variable, used in workaround for issue 146
 
 #define ITER_FUNC(name) static void name(const enum phase ph)
 
@@ -340,6 +340,10 @@ ITER_FUNC(BCGS2)
  * so we use l=2 here. In many cases one iteration of this method is similar to two iterations of BiCGStab, but overall
  * convergence is slightly better.
  * Breakdown tests were made to coincide with that for BiCGStab for l=1.
+ *
+ * !!! This iterative solver produces segmentation fault when compiled with icc 11.1. Probably that is related to
+ * issue 146. But we leave it be (assume that this is a compiler bug). Even if someone uses this compiler, he can
+ * live fine without this iterative solver.
  */
 {
 #define LL 2 // potentially the method will also work for l=1 (but memory allocation and freeing need to be adjusted)