A lot of improvements to sparse mode:

- Macro (flag) ADDA_SPARSE was shortened to SPARSE. - Makefiles modified to produce info about SPARSE and USE_SSE3 and set up interaction (errors) with other options. - Gather operations of sparse mode are now performed through AllGather(). For that it was modified to enable in-place operation, and a new MPI_Datatype mpi_int3 was introduced. Previously used functions and variables has been removed. - In sparse mode local_nvoid_d0 and local_nvoid_d1 are now initialized in ParSetup(), while SetupLocalD() is not called at all. - Memory optimizations: material_full and remnants of DipoleCoord_full are eliminated, 'position' is no more allocated but just points to a part of position_full. - A lot of indexes in sparse_ops.h are now size_t instead of int. Among other changes, Fixes issue 159. - Fixed initialization of mat_count in MakeParticle() in sparse mode. It was broken and led - Timings for init Dmatrix and FFT setup are no more shown in sparse mode. - All command line options, not supported in sparse mode, are now explicitly removed during compilation (with appropriate modification of help texts). These includes: '-shape ...' (except read), '-init_field wkb', -granul, -save_geom, -sg_format, -store_grans. - Removed error messages when using '-int ...' (except poi) in sparse mode. Now these options work fine, but relatively slow. - Information about compile options SPARSE and USE_SSE3 (when used) has been added to output of option -V. Added information about sparse mode to log file (instead of information about FFT method). - Added meaningful error message for default run of adda (without command line parameters) in sparse mode. - A lot of formatting changes to make the sparse mode similar to the rest of the code. Other changes: - interaction.c/h now holds all the code related to calculation of Green's tensor. In particular, reading/freeing of table integrals was moved from calculator.c, and duplicating code to compute Green's tensor in fft.c was removed. An important change is that now functions, which compute Green's tensor do not test for zero argument. Hence, these test is now made in InitDmatrix(). - Intermediate vector DipoleCoord_tmp is no more used in make_particle.c. This somewhat decreases memory usage during prognosis. Overall, the code in MakeParticle() was significantly changed (a lot of moving parts around). - oclcore.h is now always included, but is void if OPENCL is not specified. That is similar to other conditional headers like fft.h. - Tests (tests/2exec) were modified to incorporate sparse mode (to compare two sparse mode versions or sparse mode vs. FFT mode). Corresponding tests are listed in separate file - suite_sparse, which is automatically used if appropriate flag is uncommented in comp2exec. Added 2 new shape files and modified ellipsoid.geom. Improved behavior of diff_numeric.awk - before it was skipping differences, if the line was missing in the first file. - A number of files (new or heavily modified) were formatted for a window width of 120 symbols. - A number of minor syntax changes to remove warnings by Eclipse code analyzer. In particular, changed several empty statements from ";" to "{}", removed two unused functions, and hid several extern declarations in sparse mode. - Fixed crashes when using command line options '-shape' and '-beam' without arguments. Now meaningful error messages are produced. - version incremented to 1.2b2. The updated code was tested to compile in different compilation modes (almost all possible combinations) and tested by tests/2exec/ against the version 1.1.
adda-team · Feb 7, 2013 · 9cd261e · 9cd261e
1 parent 62a90bc
commit 9cd261e
Show file tree

Hide file tree

Showing 31 changed files with 1,661 additions and 1,825 deletions.
diff --git a/src/CalculateE.c b/src/CalculateE.c
@@ -5,7 +5,7 @@
  *        Routines for most scattering quantities are in crosssec.c. Also saves internal fields to
  *        file (optional).
  *
- * Copyright (C) 2006-2012 ADDA contributors
+ * Copyright (C) 2006-2013 ADDA contributors
  * This file is part of ADDA.
  *
  * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU
@@ -60,13 +60,6 @@ extern size_t TotalEFieldPlane;
 // used in iterative.c
 TIME_TYPE tstart_CE;
 
-// EXTERNAL FUNCTIONS
-
-// GenerateB.c
-void GenerateB(enum incpol which,doublecomplex *x);
-// iterative.c
-int IterativeSolver(enum iter method);
-
 // LOCAL VARIABLES
 
 #define MUEL_HEADER "s11 s12 s13 s14 s21 s22 s23 s24 s31 s32 s33 s34 s41 s42 s43 s44"
@@ -80,6 +73,13 @@ int IterativeSolver(enum iter method);
 #define ANGLE_FORMAT "%.2f"
 #define RMSE_FORMAT "%.3E"
 
+// EXTERNAL FUNCTIONS
+
+// GenerateB.c
+void GenerateB(enum incpol which,doublecomplex *x);
+// iterative.c
+int IterativeSolver(enum iter method);
+
 //============================================================
 
 static void ComputeMuellerMatrix(double matrix[4][4], const doublecomplex s1,const doublecomplex s2,
@@ -111,37 +111,6 @@ static void ComputeMuellerMatrix(double matrix[4][4], const doublecomplex s1,con
 
 //============================================================
 
-static void ATT_UNUSED ComputeMuellerMatrixNorm(double matrix[4][4],const doublecomplex s1,
-	const doublecomplex s2,const doublecomplex s3,const doublecomplex s4)
-/* computer mueller matrix from scattering matrix elements s1, s2, s3, s4, according to formula
- * 3.16 from Bohren and Huffman; normalize all elements to S11 (except itself)
- */
-{
-	matrix[0][0] = 0.5*(cMultConRe(s1,s1)+cMultConRe(s2,s2)+cMultConRe(s3,s3)+cMultConRe(s4,s4));
-	matrix[0][1] = 0.5*(cMultConRe(s2,s2)-cMultConRe(s1,s1)+cMultConRe(s4,s4)-cMultConRe(s3,s3))
-	             / matrix[0][0];
-	matrix[0][2] = (cMultConRe(s2,s3)+cMultConRe(s1,s4))/matrix[0][0];
-	matrix[0][3] = (cMultConIm(s2,s3)-cMultConIm(s1,s4))/matrix[0][0];
-
-	matrix[1][0] = 0.5*(cMultConRe(s2,s2)-cMultConRe(s1,s1)+cMultConRe(s3,s3)-cMultConRe(s4,s4))
-	             / matrix[0][0];
-	matrix[1][1] = 0.5*(cMultConRe(s2,s2)+cMultConRe(s1,s1)-cMultConRe(s3,s3)-cMultConRe(s4,s4))
-	             / matrix[0][0];
-	matrix[1][2] = (cMultConRe(s2,s3)-cMultConRe(s1,s4))/matrix[0][0];
-	matrix[1][3] = (cMultConIm(s2,s3)+cMultConIm(s1,s4))/matrix[0][0];
-
-	matrix[2][0] = (cMultConRe(s2,s4)+cMultConRe(s1,s3))/matrix[0][0];
-	matrix[2][1] = (cMultConRe(s2,s4)-cMultConRe(s1,s3))/matrix[0][0];
-	matrix[2][2] = (cMultConRe(s1,s2)+cMultConRe(s3,s4))/matrix[0][0];
-	matrix[2][3] = (cMultConIm(s2,s1)+cMultConIm(s4,s3))/matrix[0][0];
-
-	matrix[3][0] = (cMultConIm(s4,s2)+cMultConIm(s1,s3))/matrix[0][0];
-	matrix[3][1] = (cMultConIm(s4,s2)-cMultConIm(s1,s3))/matrix[0][0];
-	matrix[3][2] = (cMultConIm(s1,s2)-cMultConIm(s3,s4))/matrix[0][0];
-	matrix[3][3] = (cMultConRe(s1,s2)-cMultConRe(s3,s4))/matrix[0][0];
-}
-
-//==============================================================
 INLINE void InitMuellerIntegrFile(const int type,const char * restrict fname,FILE * restrict * file,
 	char * restrict buf,const size_t buf_size,double * restrict *mult)
 /* If 'phi_int_type' matches 'type', appropriate file (name given by 'fname') is created (with

diff --git a/src/Makefile b/src/Makefile
@@ -79,7 +79,7 @@ ifneq ($(if $(MAKECMDGOALS),$(if $(filter $(NONTRIVIAL),$(MAKECMDGOALS)),1,),1),
 # below are appended to the list specified elsewhere.
 # Full list of possible options is the following:
 VALID_OPTS := DEBUG DEBUGFULL FFT_TEMPERTON PRECISE_TIMING NOT_USE_LOCK ONLY_LOCKFILE NO_FORTRAN \
-              NO_CPP OVERRIDE_STDC_TEST OCL_READ_SOURCE_RUNTIME CLFFT_APPLE SPARSE
+              NO_CPP OVERRIDE_STDC_TEST OCL_READ_SOURCE_RUNTIME CLFFT_APPLE SPARSE USE_SSE3
 
 # Debug mode. By default, release configuration is used (no debug, no warnings, maximum
 # optimization). DEBUG turns on producing debugging symbols (-g) and warnings and brings
@@ -224,22 +224,42 @@ else
   $(info Release mode)
   DBGLVL := 0
 endif
-ifneq ($(filter FFT_TEMPERTON,$(OPTIONS)),)
-  $(info Temperton FFT)
-  CDEFS += -DFFT_TEMPERTON
-  ifeq ($(filter NO_FORTRAN,$(OPTIONS)),)
-    FSOURCE += cfft99D.f
-  else
-    $(error Temperton FFT (FFT_TEMPERTON) is implemented in Fortran, hence incompatible with NO_FORTRAN)
+ifneq ($(filter SPARSE,$(OPTIONS)),)
+  $(info Sparse (non-FFT) mode)
+  ifneq ($(filter FFT_TEMPERTON,$(OPTIONS)),)
+    $(error SPARSE turns off all FFT-related code, so it is incompatible with FFT_TEMPERTON)
+  endif
+  ifneq ($(filter CLFFT_APPLE,$(OPTIONS)),)
+    $(error SPARSE turns off all FFT-related code, so it is incompatible with CLFFT_APPLE)
+  endif
+  ifneq ($(filter PRECISE_TIMING,$(OPTIONS)),)
+    $(error SPARSE is currently incompatible with PRECISE_TIMING)
   endif
+
+  CDEFS += -DSPARSE
 else
-  $(info FFTW3)
-  LDLIBS += -lfftw3
-  ifdef FFTW3_INC_PATH
-    CFLAGS += -I$(FFTW3_INC_PATH)
+  CSOURCE += fft.c
+  ifneq ($(filter FFT_TEMPERTON,$(OPTIONS)),)
+    $(info Temperton FFT)
+    CDEFS += -DFFT_TEMPERTON
+    ifeq ($(filter NO_FORTRAN,$(OPTIONS)),)
+      FSOURCE += cfft99D.f
+    else
+      $(error Temperton FFT (FFT_TEMPERTON) is implemented in Fortran, hence incompatible with NO_FORTRAN)
+    endif
+  else
+    $(info FFTW3)
+    LDLIBS += -lfftw3
+    ifdef FFTW3_INC_PATH
+      CFLAGS += -I$(FFTW3_INC_PATH)
+    endif
+    ifdef FFTW3_LIB_PATH
+      LDFLAGS += -L$(FFTW3_LIB_PATH)
+    endif
   endif
-  ifdef FFTW3_LIB_PATH
-    LDFLAGS += -L$(FFTW3_LIB_PATH)
+  ifneq ($(filter CLFFT_APPLE,$(OPTIONS)),)
+    # Here only the info is printed, the main logic is in ocl/Makefile
+    $(info Apple clFFT routines)
   endif
 endif
 ifneq ($(filter PRECISE_TIMING,$(OPTIONS)),)
@@ -270,24 +290,15 @@ ifneq ($(filter OVERRIDE_STDC_TEST,$(OPTIONS)),)
   $(info Overriding test for C99 conformance)
   CDEFS += -DOVERRIDE_STDC_TEST
 endif
-
-ifneq ($(filter SPARSE,$(OPTIONS)),)
-  CDEFS += -DADDA_SPARSE
-else
-  CSOURCE += fft.c
-endif
 ifneq ($(filter USE_SSE3,$(OPTIONS)),)
   CDEFS += -DUSE_SSE3
   CFLAGS += -msse3
+  $(info Using SSE3 optimizations)
 endif
 ifneq ($(filter OCL_READ_SOURCE_RUNTIME,$(OPTIONS)),)
   # Here only the info is printed, the main logic is in ocl/Makefile
   $(info Read CL sources at runtime)
 endif
-ifneq ($(filter CLFFT_APPLE,$(OPTIONS)),)
-  # Here only the info is printed, the main logic is in ocl/Makefile
-  $(info Apple clFFT routines)
-endif
 # Process EXTRA_FLAGS
 ifneq ($(strip $(EXTRA_FLAGS)),)
   $(info Extra compiler options: '$(EXTRA_FLAGS)')
@@ -446,12 +457,12 @@ clean: cleanseq cleanmpi cleanocl
 # compilation and thus contain quite heavy processing.
 cleanseq:
 	@echo "Removing sequential compiled files"
-	cd $(SEQ) && rm -f *.o *.d *.d.* $(OPTSFILES) $(PROGSEQ) $(PROGSEQ).exe
+	cd $(SEQ) && rm -f *.o *.d $(OPTSFILES) $(PROGSEQ) $(PROGSEQ).exe
 
 cleanmpi:
 	@echo "Removing MPI compiled files"
-	cd $(MPI) && rm -f *.o *.d *.d.* $(OPTSFILES) $(PROGMPI) $(PROGMPI).exe
+	cd $(MPI) && rm -f *.o *.d $(OPTSFILES) $(PROGMPI) $(PROGMPI).exe
 
 cleanocl:
 	@echo "Removing OpenCL compiled files"
-	cd $(OCL) && rm -f *.o *.d *.d.* $(OPTSFILES) $(PROGOCL) $(PROGOCL).exe *.clstr
+	cd $(OCL) && rm -f *.o *.d $(OPTSFILES) $(PROGOCL) $(PROGOCL).exe *.clstr