diff --git a/doc/README b/doc/README index 57a496e1..c6de57df 100644 --- a/doc/README +++ b/doc/README @@ -1,3 +1,25 @@ + "Amsterdam DDA" + + Maxim A. Yurkin(1,2) and Alfons G. Hoekstra(1) + + (1) Faculty of Science, Section Computational Science, + of the University of Amsterdam, + Kruislaan 403, 1098 SJ, Amsterdam, The Netherlands, + tel: +31-20-525-7530, fax: +31-20-525-7490 + + (2) Institute of Chemical Kinetics and Combustion, + Siberian Branch of the Russian Academy of Sciences, + Institutskaya 3, Novosibirsk, 630090, Russia, + tel: +7-383-333-3240, fax: +7-383-334-2350 + + email: adda@science.uva.nl + + last revised: 8 April 2008 + + Copyright (C) 2006-2008 University of Amsterdam + This software package is covered by the GNU General Public License. + + ## ##### ## ##### ## ## /#### /##### /## /##### /## /#### / ### // / / ### // / / ### / ### @@ -17,31 +39,6 @@ ## ## ## ## - - Maxim A. Yurkin - - Institute of Chemical Kinetics and Combustion, - Siberian Branch of the Russian Academy of Sciences, - Institutskaya 3, Novosibirsk, 630090, Russia, - tel: +7-383-333-3240, fax: +7-383-334-2350 - - Alfons G. Hoekstra - - Faculty of Science, Section Computational Science, - of the University of Amsterdam, - Kruislaan 403, 1098 SJ, Amsterdam, The Netherlands, - tel: +31-20-525-7530, fax: +31-20-525-7490 - - - email: adda@science.uva.nl - - $Date:: $ - - Copyright (C) 2006-2008 University of Amsterdam - This software package is covered by the GNU General Public License. - - - 1. INTRODUCTION *************** diff --git a/doc/faq b/doc/faq index 40fcc872..85e6d9ee 100644 --- a/doc/faq +++ b/doc/faq @@ -1,7 +1,7 @@ Frequently Asked Questions - about ADDA + about Amsterdam DDA - $Date:: $ + last revised: 3 June 2007 Q: I have found a bug in ADDA. What should I do? A: 1) Make sure you are using the latest version of ADDA (check the ADDA @@ -14,7 +14,6 @@ A: 1) Make sure you are using the latest version of ADDA (check the ADDA parameters as possible without removing the bug. Also try to use defautl versions of input files, that you have modified. 5) Send the results of (3) and (4) to the authors, together with all input - files and Makefiles that you have used for compilation. Do not forget to include all the relevant output files, at least 'log'. Please also include a brief description of your operation system and hardware. We will try to @@ -71,14 +70,5 @@ A: The simplest is to specify your particle by a shape file. However, if your source files to the authors so they would be incorporated in the next release for the benefit of the community. -Q: How is the Mueller matrix, produced by ADDA, defined and/or normalized? -A: It is defined as in Bohren & Huffman "Absorption and scattering of Light by - Small Particles" (1983), and it is not normalized. Some other codes may - compute Stokes scattering matrix, which is normalized so that 1,1-element - is equal to 1 after averaging over the whole solid angle. This matrix - should be multiplied by (pi*Csca/(lambda^2)) to get Mueller matrix. Csca is - the scattering cross section for unpolarized light, equal to average of - scattering cross sections for any two perpendicular incident polarizations. - This list is far from being complete. Please send your questions to adda@science.uva.nl diff --git a/src/ADDAmain.c b/src/ADDAmain.c index d20c5715..46f48442 100644 --- a/src/ADDAmain.c +++ b/src/ADDAmain.c @@ -3,7 +3,7 @@ * DESCR: Main. All the work moved to other modules. * * Previous versions were developed by Alfons Hoekstra. - * Sequential version, Michel Grimminck January 1995 + * Sequential version, Michel Grimminck Jan 1995 * * Copyright (C) 2006-2008 University of Amsterdam * This code is covered by the GNU General Public License. @@ -15,58 +15,59 @@ #include "debug.h" #include "io.h" -// EXTERNAL FUNCTIONS +/* EXTERNAL FUNCTIONS */ -// calculator.c +/* calculator.c */ void Calculator(void); -// make_particle.c +/* make_particle.c */ void InitShape(void); int MakeParticle(void); -// param.c +/* param.c */ void InitVariables(void); void ParseParameters(int argc,char **argv); void VariablesInterconnect(void); void DirectoryLog(int argc,char **argv); void PrintInfo(void); -//============================================================ +/*============================================================*/ int main(int argc,char **argv) { - // Initialize error handling and line wrapping - logfile=NULL; - term_width=DEF_TERM_WIDTH; - // Start global time - StartTime(); - // Initialize communications - InitComm(&argc,&argv); - // Initialize and parse input parameters - InitVariables(); - ParseParameters(argc,argv); - D("Reading command line finished"); - VariablesInterconnect(); // also initializes beam - // Initialize symmetries and box's; get number of dipoles; set some variables - InitShape(); - // !!! before this line errors should be printed in simple format, after - in advanced one - // Create directory and start logfile (print command line) - DirectoryLog(argc,argv); - // Initialize FFT grid and its subdivision over processors - ParSetup(); - // MakeParticle; initialize dpl and nlocalRows - MakeParticle(); - // Print info to stdout and logfile - PrintInfo(); - // Main calculation part - D("Calculator started"); - Calculator(); - D("Calculator finished"); - // Print timing and statistics; close logfile - FinalStatistics(); - // check error on stdout - if (ferror(stdout)) LogError(EC_WARN,ALL_POS, - "Some errors occurred while writing to stdout during the execution of ADDA"); - // finish execution normally - Stop(0); - // never actually reached; just to make the compiler happy - return 0; + /* initialize error handling and line wrapping */ + logfile=NULL; + term_width=DEF_TERM_WIDTH; + /* start global time */ + StartTime(); + /* initialize communications */ + InitComm(&argc,&argv); + /* initialize and parse input parameters */ + InitVariables(); + ParseParameters(argc,argv); + D("Reading command line finished"); + VariablesInterconnect(); /* also initializes beam */ + /* initialize symmetries and box's; get number of dipoles; set some variables */ + InitShape(); + /* !!! before errorrs should be printed in simple format, after in more advanced one !!! */ + /* Create directory and start logfile (print command line) */ + DirectoryLog(argc,argv); + /* initialize FFT grid and its subdivision over processors */ + ParSetup(); + /* MakeParticle; initialize dpl and nlocalRows */ + MakeParticle(); + /* print info to stdout and logfile */ + PrintInfo(); + /* initialize times and counters */ + /* Main calculation part */ + D("Calculator started"); + Calculator(); + D("Calculator finished"); + /* print timing and statistics; close logfile */ + FinalStatistics(); + /* check error on stdout */ + if (ferror(stdout)) LogError(EC_WARN,ALL_POS, + "Some errors occured while writing to stdout during the execution of ADDA"); + /* finish execution normally */ + Stop(0); + /* never actually reached; just to make the compiler happy */ + return 0; } diff --git a/src/CalculateE.c b/src/CalculateE.c index 9e1e4220..8569799f 100644 --- a/src/CalculateE.c +++ b/src/CalculateE.c @@ -9,7 +9,7 @@ * * Previous versions by Alfons Hoekstra * - * Copyright (C) 2006-2008 University of Amsterdam + * Copyright (C) 2006-2007 University of Amsterdam * This code is covered by the GNU General Public License. */ #include @@ -27,675 +27,722 @@ #include "timing.h" #include "function.h" -// SEMI-GLOBAL VARIABLES +/* SEMI-GLOBAL VARIABLES */ -// defined and initialized in calculator.c -extern double *muel_phi,*muel_phi_buf; +/* defined and initialized in calculator.c */ +extern double *muel_phi,*muel_phi1; extern doublecomplex *EplaneX, *EplaneY; extern double *Eplane_buffer; extern const double dtheta_deg,dtheta_rad; extern doublecomplex *ampl_alphaX,*ampl_alphaY; extern double *muel_alpha; -// defined and initialized in crosssec.c +/* defined and initialized in crosssec.c */ extern const Parms_1D phi_sg; -// defined and initialized in param.c +/* defined and initialized in param.c */ extern const int store_int_field,store_dip_pol,store_beam,store_scat_grid,calc_Cext,calc_Cabs, -calc_Csca,calc_vec,calc_asym,calc_mat_force,store_force,phi_int_type; -// defined and initialized in timing.c + calc_Csca,calc_vec,calc_asym,calc_mat_force,store_force,phi_int_type; +/* defined and initialized in timing.c */ extern TIME_TYPE Timing_EFieldPlane,Timing_comm_EField, -Timing_IntField,Timing_IntFieldOne,Timing_ScatQuan; + Timing_IntField,Timing_IntFieldOne,Timing_ScatQuan; extern unsigned long TotalEFieldPlane; -// used in iterative.c +/* used in iterative.c */ TIME_TYPE tstart_CE; -// EXTERNAL FUNCTIONS +/* EXTERNAL FUNCTIONS */ -// GenerateB.c +/* GenerateB.c */ void GenerateB(char which,doublecomplex *x); -// iterative.c +/* iterative.c */ int IterativeSolver(int method); -//============================================================ +/*============================================================*/ -static void ComputeMuellerMatrix(double matrix[4][4], const doublecomplex s1,const doublecomplex s2, - const doublecomplex s3,const doublecomplex s4) -/* computer mueller matrix from scattering matrix elements s1, s2, s3, s4, according to formula - * 3.16 from Bohren and Huffman - */ +static void ComputeMuellerMatrix(double matrix[4][4], const doublecomplex s1, + const doublecomplex s2,const doublecomplex s3,const doublecomplex s4) +/* computer mueller matrix from scattering matrix elements s1, s2, s3, s4, accoording + to formula 3.16 from Bohren and Huffman */ { - matrix[0][0] = 0.5*(cMultConRe(s1,s1)+cMultConRe(s2,s2)+cMultConRe(s3,s3)+cMultConRe(s4,s4)); - matrix[0][1] = 0.5*(cMultConRe(s2,s2)-cMultConRe(s1,s1)+cMultConRe(s4,s4)-cMultConRe(s3,s3)); - matrix[0][2] = cMultConRe(s2,s3)+cMultConRe(s1,s4); - matrix[0][3] = cMultConIm(s2,s3)-cMultConIm(s1,s4); - - matrix[1][0] = 0.5*(cMultConRe(s2,s2)-cMultConRe(s1,s1)+cMultConRe(s3,s3)-cMultConRe(s4,s4)); - matrix[1][1] = 0.5*(cMultConRe(s2,s2)+cMultConRe(s1,s1)-cMultConRe(s3,s3)-cMultConRe(s4,s4)); - matrix[1][2] = cMultConRe(s2,s3)-cMultConRe(s1,s4); - matrix[1][3] = cMultConIm(s2,s3)+cMultConIm(s1,s4); - - matrix[2][0] = cMultConRe(s2,s4)+cMultConRe(s1,s3); - matrix[2][1] = cMultConRe(s2,s4)-cMultConRe(s1,s3); - matrix[2][2] = cMultConRe(s1,s2)+cMultConRe(s3,s4); - matrix[2][3] = cMultConIm(s2,s1)+cMultConIm(s4,s3); - - matrix[3][0] = cMultConIm(s4,s2)+cMultConIm(s1,s3); - matrix[3][1] = cMultConIm(s4,s2)-cMultConIm(s1,s3); - matrix[3][2] = cMultConIm(s1,s2)-cMultConIm(s3,s4); - matrix[3][3] = cMultConRe(s1,s2)-cMultConRe(s3,s4); + matrix[0][0] = 0.5*(cMultConRe(s1,s1)+cMultConRe(s2,s2)+ + cMultConRe(s3,s3)+cMultConRe(s4,s4)); + matrix[0][1] = 0.5*(cMultConRe(s2,s2)-cMultConRe(s1,s1)+ + cMultConRe(s4,s4)-cMultConRe(s3,s3)); + matrix[0][2] = cMultConRe(s2,s3)+cMultConRe(s1,s4); + matrix[0][3] = cMultConIm(s2,s3)-cMultConIm(s1,s4); + + matrix[1][0] = 0.5*(cMultConRe(s2,s2)-cMultConRe(s1,s1)+ + cMultConRe(s3,s3)-cMultConRe(s4,s4)); + matrix[1][1] = 0.5*(cMultConRe(s2,s2)+cMultConRe(s1,s1)+ + -cMultConRe(s3,s3)-cMultConRe(s4,s4)); + matrix[1][2] = cMultConRe(s2,s3)-cMultConRe(s1,s4); + matrix[1][3] = cMultConIm(s2,s3)+cMultConIm(s1,s4); + + matrix[2][0] = cMultConRe(s2,s4)+cMultConRe(s1,s3); + matrix[2][1] = cMultConRe(s2,s4)-cMultConRe(s1,s3); + matrix[2][2] = cMultConRe(s1,s2)+cMultConRe(s3,s4); + matrix[2][3] = cMultConIm(s2,s1)+cMultConIm(s4,s3); + + matrix[3][0] = cMultConIm(s4,s2)+cMultConIm(s1,s3); + matrix[3][1] = cMultConIm(s4,s2)-cMultConIm(s1,s3); + matrix[3][2] = cMultConIm(s1,s2)-cMultConIm(s3,s4); + matrix[3][3] = cMultConRe(s1,s2)-cMultConRe(s3,s4); } -//============================================================ -// this function is currently not used -static void ComputeMuellerMatrixNorm(double [4][4],const doublecomplex,const doublecomplex, - const doublecomplex,const doublecomplex) ATT_UNUSED; +/*============================================================*/ + /* this function is currently not used */ +static void ComputeMuellerMatrixNorm(double [4][4],const doublecomplex, + const doublecomplex,const doublecomplex,const doublecomplex) ATT_UNUSED; static void ComputeMuellerMatrixNorm(double matrix[4][4],const doublecomplex s1, - const doublecomplex s2,const doublecomplex s3,const doublecomplex s4) -/* computer mueller matrix from scattering matrix elements s1, s2, s3, s4, according to formula - * 3.16 from Bohren and Huffman; normalize all elements to S11 (except itself) - */ -{ - matrix[0][0] = 0.5*(cMultConRe(s1,s1)+cMultConRe(s2,s2)+cMultConRe(s3,s3)+cMultConRe(s4,s4)); - matrix[0][1] = 0.5*(cMultConRe(s2,s2)-cMultConRe(s1,s1)+cMultConRe(s4,s4)-cMultConRe(s3,s3)) - / matrix[0][0]; - matrix[0][2] = (cMultConRe(s2,s3)+cMultConRe(s1,s4))/matrix[0][0]; - matrix[0][3] = (cMultConIm(s2,s3)-cMultConIm(s1,s4))/matrix[0][0]; - - matrix[1][0] = 0.5*(cMultConRe(s2,s2)-cMultConRe(s1,s1)+cMultConRe(s3,s3)-cMultConRe(s4,s4)) - / matrix[0][0]; - matrix[1][1] = 0.5*(cMultConRe(s2,s2)+cMultConRe(s1,s1)-cMultConRe(s3,s3)-cMultConRe(s4,s4)) - / matrix[0][0]; - matrix[1][2] = (cMultConRe(s2,s3)-cMultConRe(s1,s4))/matrix[0][0]; - matrix[1][3] = (cMultConIm(s2,s3)+cMultConIm(s1,s4))/matrix[0][0]; - - matrix[2][0] = (cMultConRe(s2,s4)+cMultConRe(s1,s3))/matrix[0][0]; - matrix[2][1] = (cMultConRe(s2,s4)-cMultConRe(s1,s3))/matrix[0][0]; - matrix[2][2] = (cMultConRe(s1,s2)+cMultConRe(s3,s4))/matrix[0][0]; - matrix[2][3] = (cMultConIm(s2,s1)+cMultConIm(s4,s3))/matrix[0][0]; - - matrix[3][0] = (cMultConIm(s4,s2)+cMultConIm(s1,s3))/matrix[0][0]; - matrix[3][1] = (cMultConIm(s4,s2)-cMultConIm(s1,s3))/matrix[0][0]; - matrix[3][2] = (cMultConIm(s1,s2)-cMultConIm(s3,s4))/matrix[0][0]; - matrix[3][3] = (cMultConRe(s1,s2)-cMultConRe(s3,s4))/matrix[0][0]; -} - -//============================================================== -INLINE void InitMuellerIntegrFile(const int type,const char *fname,FILE **file,char *buf, - double **mult) -/* If 'phi_int_type' matches 'type', appropriate file (name given by 'fname') is created (with - * handle '*file'), and heading line is put into it. String buffer 'buf' is used. Vector of - * multipliers '*mult' is allocated if its pointer is specified. - */ -{ - if (phi_int_type & type) { - sprintf(buf,"%s/%s",directory,fname); - (*file)=FOpenErr(buf,"w",ONE_POS); - fprintf(*file,"theta s11 s12 s13 s14 s21 s22 s23 s24 s31 s32 s33 s34 s41 s42 s43 s44 " - "RMSE(integr)\n"); - if (mult!=NULL) MALLOC_VECTOR(*mult,double,angles.phi.N,ALL); - } -} - -//============================================================== - -INLINE void PrintToIntegrFile(const int type,FILE *file,double *maxerr,const double *muel, - double *muel_buf,const double *mult,double matrix[4][4],const double theta) -/* If 'phi_int_type' matches 'type', array 'muel' is integrated over phi (possibly using multiplier - * 'mult' and buffer 'muel_buf') and saved to 'file' together with 'theta'. Maximum error '*maxerr' - * is updated, 'matrix' buffer is used. - */ + const doublecomplex s2,const doublecomplex s3,const doublecomplex s4) +/* computer mueller matrix from scattering matrix elements s1, s2, s3, s4, accoording to + formula 3.16 from Bohren and Huffman; normalize all elements to S11 (except itself)*/ { - int k; - size_t j; - double err; - - if (phi_int_type & type) { - if (mult==NULL) err=Romberg1D(phi_sg,16,muel,matrix[0]); - else { - for (j=0;j*maxerr) *maxerr=err; - fprintf(file,"%.2f %.10E %.10E %.10E %.10E %.10E %.10E %.10E %.10E %.10E %.10E %.10E %.10E" - " %.10E %.10E %.10E %.10E %.3E\n",theta,matrix[0][0],matrix[0][1],matrix[0][2], - matrix[0][3],matrix[1][0],matrix[1][1],matrix[1][2],matrix[1][3],matrix[2][0], - matrix[2][1],matrix[2][2],matrix[2][3],matrix[3][0],matrix[3][1],matrix[3][2], - matrix[3][3],err); - } + matrix[0][0] = 0.5*(cMultConRe(s1,s1)+cMultConRe(s2,s2)+ + cMultConRe(s3,s3)+cMultConRe(s4,s4)); + matrix[0][1] = 0.5*(cMultConRe(s2,s2)-cMultConRe(s1,s1)+ + cMultConRe(s4,s4)-cMultConRe(s3,s3))/matrix[0][0]; + matrix[0][2] = (cMultConRe(s2,s3)+cMultConRe(s1,s4))/matrix[0][0]; + matrix[0][3] = (cMultConIm(s2,s3)-cMultConIm(s1,s4))/matrix[0][0]; + + matrix[1][0] = 0.5*(cMultConRe(s2,s2)-cMultConRe(s1,s1)+ + cMultConRe(s3,s3)-cMultConRe(s4,s4))/matrix[0][0]; + matrix[1][1] = 0.5*(cMultConRe(s2,s2)+cMultConRe(s1,s1)+ + -cMultConRe(s3,s3)-cMultConRe(s4,s4))/matrix[0][0]; + matrix[1][2] = (cMultConRe(s2,s3)-cMultConRe(s1,s4))/matrix[0][0]; + matrix[1][3] = (cMultConIm(s2,s3)+cMultConIm(s1,s4))/matrix[0][0]; + + matrix[2][0] = (cMultConRe(s2,s4)+cMultConRe(s1,s3))/matrix[0][0]; + matrix[2][1] = (cMultConRe(s2,s4)-cMultConRe(s1,s3))/matrix[0][0]; + matrix[2][2] = (cMultConRe(s1,s2)+cMultConRe(s3,s4))/matrix[0][0]; + matrix[2][3] = (cMultConIm(s2,s1)+cMultConIm(s4,s3))/matrix[0][0]; + + matrix[3][0] = (cMultConIm(s4,s2)+cMultConIm(s1,s3))/matrix[0][0]; + matrix[3][1] = (cMultConIm(s4,s2)-cMultConIm(s1,s3))/matrix[0][0]; + matrix[3][2] = (cMultConIm(s1,s2)-cMultConIm(s3,s4))/matrix[0][0]; + matrix[3][3] = (cMultConRe(s1,s2)-cMultConRe(s3,s4))/matrix[0][0]; } -//============================================================== - -INLINE void CloseIntegrFile(const int type,FILE *file,const char *fname,double *mult) -/* If 'phi_int_type' matches 'type', appropriate 'file' (named 'fname') is closed and array 'mult' - * is freed. - */ -{ - if (phi_int_type & type) { - FCloseErr(file,fname,ONE_POS); - Free_general(mult); - } -} -//============================================================== +/*==============================================================*/ void MuellerMatrix(void) { - FILE *mueller,*mueller_int,*mueller_int_c2,*mueller_int_s2,*mueller_int_c4,*mueller_int_s4; - double *cos2,*sin2,*cos4,*sin4; - double matrix[4][4]; - double theta,phi,ph, - max_err,max_err_c2,max_err_s2,max_err_c4,max_err_s4; - doublecomplex s1,s2,s3,s4,s10,s20,s30,s40; - char fname[MAX_FNAME]; - int i; - size_t index,index1,k_or,j,n,ind; - double co,si; - double alph; - TIME_TYPE tstart; - - if (ringid!=ROOT) return; - - if (orient_avg) { // Amplitude matrix stored in ampl_alplha is - index1=index=0; // transformed into Mueller matrix stored in muel_alpha - for (k_or=0;k_or %.3E\n",max_err); - if (phi_int_type & PHI_COS2) fprintf(logfile," cos(2*phi) -> %.3E\n",max_err_c2); - if (phi_int_type & PHI_SIN2) fprintf(logfile," cos(2*phi) -> %.3E\n",max_err_c2); - if (phi_int_type & PHI_COS4) fprintf(logfile," cos(2*phi) -> %.3E\n",max_err_c2); - if (phi_int_type & PHI_SIN4) fprintf(logfile," cos(2*phi) -> %.3E\n",max_err_c2); - } - // close files; free arrays - if (store_scat_grid) FCloseErr(mueller,F_MUEL_SG,ONE_POS); - if (phi_integr) { - CloseIntegrFile(PHI_UNITY,mueller_int,F_MUEL_INT,NULL); - CloseIntegrFile(PHI_COS2,mueller_int_c2,F_MUEL_C2,cos2); - CloseIntegrFile(PHI_SIN2,mueller_int_s2,F_MUEL_S2,sin2); - CloseIntegrFile(PHI_COS4,mueller_int_c4,F_MUEL_C4,cos4); - CloseIntegrFile(PHI_SIN4,mueller_int_s4,F_MUEL_S4,sin4); - } - } - Timing_FileIO += GET_TIME() - tstart; - } + FILE *mueller,*mueller_int,*mueller_int_c2,*mueller_int_s2,*mueller_int_c4,*mueller_int_s4; + double *cos2,*sin2,*cos4,*sin4; + double matrix[4][4]; + double theta,phi,ph,err, + max_err,max_err_c2,max_err_s2,max_err_c4,max_err_s4; + doublecomplex s1,s2,s3,s4,s10,s20,s30,s40; + char fname[MAX_FNAME]; + int i,k; + size_t index,index1,k_or,j,n,ind; + double co,si; + double alph; + TIME_TYPE tstart; + + if (ringid!=ROOT) return; + + if (orient_avg) { /* Amplitude matrix stored in ampl_alplha is */ + index1=index=0; /* transformed into Mueller matrix stored in muel_alpha */ + for (k_or=0;k_ormax_err) max_err=err; + fprintf(mueller_int, + "%.2f %.10E %.10E %.10E %.10E %.10E %.10E %.10E %.10E %.10E"\ + " %.10E %.10E %.10E %.10E %.10E %.10E %.10E %.3E\n", + theta,matrix[0][0],matrix[0][1],matrix[0][2],matrix[0][3], + matrix[1][0],matrix[1][1],matrix[1][2],matrix[1][3], + matrix[2][0],matrix[2][1],matrix[2][2],matrix[2][3], + matrix[3][0],matrix[3][1],matrix[3][2],matrix[3][3],err); + } + if (phi_int_type & PHI_COS2) { + for (j=0;jmax_err_c2) max_err_c2=err; + fprintf(mueller_int_c2, + "%.2f %.10E %.10E %.10E %.10E %.10E %.10E %.10E %.10E %.10E"\ + " %.10E %.10E %.10E %.10E %.10E %.10E %.10E %.3E\n", + theta,matrix[0][0],matrix[0][1],matrix[0][2],matrix[0][3], + matrix[1][0],matrix[1][1],matrix[1][2],matrix[1][3], + matrix[2][0],matrix[2][1],matrix[2][2],matrix[2][3], + matrix[3][0],matrix[3][1],matrix[3][2],matrix[3][3],err); + } + if (phi_int_type & PHI_SIN2) { + for (j=0;jmax_err_s2) max_err_s2=err; + fprintf(mueller_int_s2, + "%.2f %.10E %.10E %.10E %.10E %.10E %.10E %.10E %.10E %.10E"\ + " %.10E %.10E %.10E %.10E %.10E %.10E %.10E %.3E\n", + theta,matrix[0][0],matrix[0][1],matrix[0][2],matrix[0][3], + matrix[1][0],matrix[1][1],matrix[1][2],matrix[1][3], + matrix[2][0],matrix[2][1],matrix[2][2],matrix[2][3], + matrix[3][0],matrix[3][1],matrix[3][2],matrix[3][3],err); + } + if (phi_int_type & PHI_COS4) { + for (j=0;jmax_err_c4) max_err_c4=err; + fprintf(mueller_int_c4, + "%.2f %.10E %.10E %.10E %.10E %.10E %.10E %.10E %.10E %.10E"\ + " %.10E %.10E %.10E %.10E %.10E %.10E %.10E %.3E\n", + theta,matrix[0][0],matrix[0][1],matrix[0][2],matrix[0][3], + matrix[1][0],matrix[1][1],matrix[1][2],matrix[1][3], + matrix[2][0],matrix[2][1],matrix[2][2],matrix[2][3], + matrix[3][0],matrix[3][1],matrix[3][2],matrix[3][3],err); + } + if (phi_int_type & PHI_SIN4) { + for (j=0;jmax_err_s4) max_err_s4=err; + fprintf(mueller_int_s4, + "%.2f %.10E %.10E %.10E %.10E %.10E %.10E %.10E %.10E %.10E"\ + " %.10E %.10E %.10E %.10E %.10E %.10E %.10E %.3E\n", + theta,matrix[0][0],matrix[0][1],matrix[0][2],matrix[0][3], + matrix[1][0],matrix[1][1],matrix[1][2],matrix[1][3], + matrix[2][0],matrix[2][1],matrix[2][2],matrix[2][3], + matrix[3][0],matrix[3][1],matrix[3][2],matrix[3][3],err); + } + } + } + if (phi_integr) { + fprintf(logfile,"\nMaximum relative mean-square error of Mueller integration:\n"); + if (phi_int_type & PHI_UNITY) fprintf(logfile," 1 -> %.3E\n",max_err); + if (phi_int_type & PHI_COS2) fprintf(logfile," cos(2*phi) -> %.3E\n",max_err_c2); + if (phi_int_type & PHI_SIN2) fprintf(logfile," cos(2*phi) -> %.3E\n",max_err_c2); + if (phi_int_type & PHI_COS4) fprintf(logfile," cos(2*phi) -> %.3E\n",max_err_c2); + if (phi_int_type & PHI_SIN4) fprintf(logfile," cos(2*phi) -> %.3E\n",max_err_c2); + } + /* close files; free arrays */ + if (store_scat_grid) FCloseErr(mueller,F_MUEL_SG,ONE_POS); + if (phi_integr) { + if (phi_int_type & PHI_UNITY) FCloseErr(mueller_int,F_MUEL_INT,ONE_POS); + if (phi_int_type & PHI_COS2) { + FCloseErr(mueller_int_c2,F_MUEL_C2,ONE_POS); + Free_general(cos2); + } + if (phi_int_type & PHI_SIN2) { + FCloseErr(mueller_int_s2,F_MUEL_S2,ONE_POS); + Free_general(sin2); + } + if (phi_int_type & PHI_COS4) { + FCloseErr(mueller_int_c4,F_MUEL_C4,ONE_POS); + Free_general(cos4); + } + if (phi_int_type & PHI_SIN4) { + FCloseErr(mueller_int_s4,F_MUEL_S4,ONE_POS); + Free_general(sin4); + } + } + } + Timing_FileIO += GET_TIME() - tstart; + } } -//============================================================ +/*============================================================*/ static void CalcEplane(const char which,const int type) -// calculates scattered electric field in a plane + /* calculates scattered electric field in a plane */ { - double *incPol,*incPolper,*incPolpar; - // where to store calculated field for one plane (actually points to different other arrays) - doublecomplex *Eplane; - int i; - doublecomplex ebuff[3]; // small vector to hold E fields - double robserver[3]; // small vector for observer in E calculation - double epar[3]; // unit vector in direction of Epar - double theta; // scattering angle - double co,si; // temporary, cos and sin of some angle - double incPol_tmp1[3],incPol_tmp2[3]; // just allocated memory for incPolper, incPolpar - double alph; - TIME_TYPE tstart; - size_t k_or; - int orient,Norient; - char choice; - - incPolper=incPol_tmp1; // initialization of per and par polarizations - incPolpar=incPol_tmp2; - - if (type==CE_NORMAL) Norient=1; // initialize # orientations - else if (type==CE_PARPER) Norient=2; - - for (k_or=0;k_or X - memcpy(incPolpar,incPolY,3*sizeof(double)); // par <=> Y - } - - for(orient=0;orient -IncPolX; incPolX -> IncPolY - * */ - if (which=='X') choice='Y'; - else if (which=='Y') choice='X'; - incPol=incPolper; - incPolper=incPolpar; - incPolpar=incPol; - MultScal(-1,incPolpar,incPolpar); - } - // initialize Eplane - if (orient_avg) { - if (choice=='X') Eplane=ampl_alphaX + 2*nTheta*k_or; - else if (choice=='Y') Eplane=ampl_alphaY + 2*nTheta*k_or; - } - else { - if (choice=='X') Eplane=EplaneX; - else if (choice=='Y') Eplane=EplaneY; - } - - for (i=0;i X */ + memcpy(incPolpar,incPolY,3*sizeof(double)); /* par <=> Y */ + } + + for(orient=0;orient -IncPolX; incPolX -> IncPolY */ + if (which=='X') choice='Y'; + else if (which=='Y') choice='X'; + incPol=incPolper; + incPolper=incPolpar; + incPolpar=incPol; + MultScal(-1,incPolpar,incPolpar); + } + /* initialize Eplane */ + if (orient_avg) { + if (choice=='X') Eplane=ampl_alphaX + 2*nTheta*k_or; + else if (choice=='Y') Eplane=ampl_alphaY + 2*nTheta*k_or; + } + else { + if (choice=='X') Eplane=EplaneX; + else if (choice=='Y') Eplane=EplaneY; + } + + for (i=0;i @@ -28,197 +28,194 @@ #include "const.h" #include "comm.h" -// SEMI-GLOBAL VARIABLES +/* SEMI-GLOBAL VARIABLES */ -// defined and initialized in param.c +/* defined and initialized in param.c */ extern const int beam_Npars; extern const double beam_pars[]; -// used in crosssec.c -double beam_center_0[3]; // position of the beam center in laboratory reference frame -// used in param.c -char beam_descr[MAX_PARAGRAPH]; // string for log file with beam parameters +/* used in crosssec.c */ +double beam_center_0[3]; /* position of the beam center in laboratory reference frame */ +/* used in param.c */ +char beam_descr[MAX_PARAGRAPH]; /* string for log file with beam parameters */ -// LOCAL VARIABLES -double s,s2; // beam confinement factor and its square -double scale_x,scale_z; // multipliers for scaling coordinates +/* LOCAL VARIABLES */ +double s,s2; /* beam confinement factor and its square */ +double scale_x,scale_z; /* multipliers for scaling coordinates */ -//============================================================ +/*============================================================*/ void InitBeam(void) -// initialize beam; produce description string + /* initialize beam; produce dscription string */ { - double w0; // beam width + double w0; /* beam width */ - if (beamtype==B_PLANE) { - STRCPYZ(beam_descr,"Plane wave"); - beam_asym=FALSE; - } - else { - // initialize parameters - w0=beam_pars[0]; - beam_asym=(beam_Npars==4 && (beam_pars[1]!=0 || beam_pars[2]!=0 || beam_pars[3]!=0)); - if (beam_asym) { - memcpy(beam_center_0,beam_pars+1,3*sizeof(double)); - // if necessary break the symmetry of the problem - if (beam_center_0[0]!=0) symX=symR=FALSE; - if (beam_center_0[1]!=0) symY=symR=FALSE; - if (beam_center_0[2]!=0) symZ=FALSE; - } - else beam_center[0]=beam_center[1]=beam_center[2]=0; - s=1/(WaveNum*w0); - s2=s*s; - scale_x=1/w0; - scale_z=s*scale_x; // 1/(k*w0^2) - // beam info - if (ringid==ROOT) { - strcpy(beam_descr,"Gaussian beam ("); - if (beamtype==B_LMINUS) strcat(beam_descr,"L- approximation)\n"); - else if (beamtype==B_DAVIS3) strcat(beam_descr,"3rd order approximation, by Davis)\n"); - else if (beamtype==B_BARTON5) strcat(beam_descr,"5th order approximation, by Barton)\n"); - sprintf(beam_descr+strlen(beam_descr),"\tWidth=%g (confinement factor s=%g)\n",w0,s); - if (beam_asym) - sprintf(beam_descr+strlen(beam_descr),"\tCenter position: (%g,%g,%g)", - beam_center_0[0],beam_center_0[1],beam_center_0[2]); - else strcat(beam_descr,"\tCenter is in the origin"); - } - } + if (beamtype==B_PLANE) { + STRCPYZ(beam_descr,"Plane wave"); + beam_asym=FALSE; + } + else { + /* initialize parameters */ + w0=beam_pars[0]; + beam_asym=(beam_Npars==4 && (beam_pars[1]!=0 || beam_pars[2]!=0 || beam_pars[3]!=0)); + if (beam_asym) { + memcpy(beam_center_0,beam_pars+1,3*sizeof(double)); + /* if needed break the symmetry of the problem */ + if (beam_center_0[0]!=0) symX=symR=FALSE; + if (beam_center_0[1]!=0) symY=symR=FALSE; + if (beam_center_0[2]!=0) symZ=FALSE; + } + else beam_center[0]=beam_center[1]=beam_center[2]=0; + s=1/(WaveNum*w0); + s2=s*s; + scale_x=1/w0; + scale_z=s*scale_x; /* 1/(k*w0^2) */ + /* beam info */ + if (ringid==ROOT) { + strcpy(beam_descr,"Gaussian beam ("); + if (beamtype==B_LMINUS) strcat(beam_descr,"L- approximation)\n"); + else if (beamtype==B_DAVIS3) strcat(beam_descr,"3rd order approximation, by Davis)\n"); + else if (beamtype==B_BARTON5) strcat(beam_descr,"5th order approximation, by Barton)\n"); + sprintf(beam_descr+strlen(beam_descr),"\tWidth=%g (confinement factor s=%g)\n",w0,s); + if (beam_asym) + sprintf(beam_descr+strlen(beam_descr),"\tCenter position: (%g,%g,%g)", + beam_center_0[0],beam_center_0[1],beam_center_0[2]); + else strcat(beam_descr,"\tCenter is in the origin"); + } + } } -//============================================================ +/*============================================================*/ -void GenerateB (const char which, // x - or y polarized incident light - doublecomplex *b) // the b vector for the incident field -// generates incident beam at every dipole +void GenerateB (const char which, /* x - or y polarized incident light */ + doublecomplex *b) /* the b vector for the incident field */ + /* generates incident beam at every dipole */ { - size_t i,j; - doublecomplex psi0,Q,Q2; - doublecomplex v1[3],v2[3],v3[3]; - double ro2,ro4; - double x,y,z,x2_s,xy_s; - doublecomplex t1,t2,t3,t4,t5,t6,t7,t8,t0,ctemp; - double const *ex; // coordinate axis of the beam reference frame - double ey[3]; - double r1[3]; + size_t i,j; + doublecomplex psi0,Q,Q2; + doublecomplex v1[3],v2[3],v3[3]; + double ro2,ro4; + double x,y,z,x2_s,xy_s; + doublecomplex t1,t2,t3,t4,t5,t6,t7,t8,t0,ctemp; + double const *ex; /* coordinate axis of the beam reference frame */ + double ey[3]; + double r1[3]; - // set reference frame of the beam; ez=prop, ex - incident polarization - if (which=='Y') { - ex=incPolY; - memcpy(ey,incPolX,3*sizeof(double)); - MultScal(-1,ey,ey); - } - if (which=='X') { - ex=incPolX; - memcpy(ey,incPolY,3*sizeof(double)); - } - // plane is separate to be fast - if (beamtype==B_PLANE) - for (i=0;inul -qipa=level=2 -qhot - DEPFLAG = -qmakedep=gcc -qsyntaxonly - CWARN = -qsuppress=1506-224:1506-342:1500-036 + ifndef RELEASE + CWARN = -w0 -msg_disable nestedcomment,unknownpragma,unreachcode + endif endif ifeq ($(COMPILER),other) endif -# if 'release' turn off warningns -ifdef RELEASE - CWARN = -w - LWARN = -w -endif # Finalize option flags (almost) CFLAGS += $(COPT) $(CWARN) FFLAGS += $(FOPT) $(FWARN) diff --git a/src/Romberg.c b/src/Romberg.c index 995aa847..fb950d4b 100644 --- a/src/Romberg.c +++ b/src/Romberg.c @@ -18,7 +18,7 @@ * Two instances of Romberg 2D should not be used in parallel (they use common storage). * E.g. calculation of Csca inside orientation averaging must not be done. * - * Integration parameters are described in a special structure Parms_1D defined in types.h + * Integration parameters are desribed in a special structure Parms_1D defined in types.h * They must be set outside of the Romberg routine. * * All the routines normalize the result on the interval width, i.e. @@ -39,495 +39,493 @@ #include "memory.h" #include "io.h" -// SEMI-GLOBAL VARIABLES +/* SEMI-GLOBAL VARIABLES */ -// defined and initialized in crosssec.c +/* defined and initialized in crossec.c */ extern const int full_al_range; -// LOCAL VARIABLES - -static int dim; // dimension of the data (integrated simultaneously) -static int N_eval; // number of function evaluations in inner cycle -static int N_tot_eval; // total number of function evaluation -static int no_convergence; // number of inner integrals that did not converge -static FILE *file; // file to print info -// used in inner loop -static int size_in; // size of M array -static double **M_in, // array of M values - *T_in, // T_m^0 - *dummy_in; // save function values -// used in outer loop +/* LOCAL VARIABLES */ + +static int dim; /* dimension of the data (integrated simultaneously) */ +static int N_eval; /* number of function evaluations in inner cycle */ +static int N_tot_eval; /* total number of function evaluation */ +static int no_convergence; /* number of inner integrals that did not converge */ +static FILE *file; /* file to print info */ +/* used in inner loop */ +static int size_in; /* size of M array */ +static double **M_in, /* array of M values */ + *T_in, /* T_m^0 */ + *dummy_in; /* save function values */ +/* used in outer loop */ static int size_out; -static double **M_out,*T_out,*dummy_out; // analogous to the above -// common arrays with frequently used values -static double *tv1, // 4^m - *tv2, // 1/(4^m-1) - *tv3; // 2*4^m-1 -// pointer to the function that is integrated +static double **M_out,*T_out,*dummy_out; /* analogous to the above */ +/* common arrays with frequently used values */ +static double *tv1, /* 4^m */ + *tv2, /* 1/(4^m-1) */ + *tv3; /* 2*4^m-1 */ +/* pointer to the function that is integrated */ static double (*func)(int theta,int phi,double *res); -static const Parms_1D *input; // parameters of integration - -//============================================================ - -double Romberg1D(const Parms_1D param, // parameters of integration - const int size, // size of block of data - const double *data, // written as sequential blocks - double *res) // where to put result -/* Performs integration of data. Since all values are already calculated, no adaptation is used - * (all data is used). Result is normalized on the interval width, i.e. actually averaging takes - * place. Returns relative mean-square error; if function is periodic then only the first column of - * the table is used - i.e. trapezoid rule. This function is completely independent of others. - */ +static const Parms_1D *input; /* parameters of integration */ + +/*============================================================*/ + +double Romberg1D(/* Performs integration of data */ + const Parms_1D param,/* parameters of integration */ + const int size, /* size of block of data */ + const double *data, /* written as sequential blocks */ + double *res) /* where to put result */ + + /* Since all values are already calculated, no adaptation is used (all data is used) + Result is normalized on the interval width, i.e. actually averaging takes place. + returns relative mean-square error; if function is periodic then only the first column + of the table is used - i.e. trapezoid rule; + This function is completely independent */ { - int m,m0,comp,i,step,index,Msize; - size_t j; - double abs_res,abs_err; // norms (squared) of result and error - double temp; - double **M1,*T1,*t1,*t2,*t3; // analogous to those used in 2D Romberg - - // allocate memory - Msize = param.periodic ? 0 : param.Jmax; - MALLOC_DMATRIX(M1,Msize+1,size,ONE); - MALLOC_VECTOR(T1,double,size,ONE); - // common to fasten calculations; needed only for really Romberg - if (Msize!=0) { - MALLOC_VECTOR(t1,double,Msize+1,ONE); - MALLOC_DVECTOR2(t2,1,Msize,ONE); - MALLOC_VECTOR(t3,double,Msize+1,ONE); - t1[0]=1; - for (i=1;i>m; - for (comp=0;comp>1;j=0;i--) for (comp=0;comp>m; + for (comp=0;comp>1;j=0;i--) for (comp=0;comp M[k] -new-> M_(m-k)^k - */ +static void RombergIterate(double **M, /* array of M values */ + const int m) /* maximum order */ + /* performs one Romberg iteration; transforms previous array of M into a new one + M_m^k=((4^m)*M_(m-1)^(k+1)-M_(m-1)^k)/(4^m-1); + our storage implies M_(m-1-k)^k -old-> M[k] -new-> M_(m-k)^k */ { - int k,comp; + int k,comp; - for (k=m-1;k>=0;k--) for (comp=0;comp=0;k--) + for (comp=0;comp>n; - // init sum - for (comp=0;comp>1;j>n; + /* init sum */ + for (comp=0;comp>1;j=input[PHI].Jmin-1) { - abs_res=0.5*fabs(M_in[0][0]+T_in[0]); - abs_err=0.5*fabs(M_in[0][0]-T_in[0])+int_err; - if (abs_res==0) err=0; - else err=abs_err/abs_res; - if (err=input[PHI].eps) { - fprintf(file,"Inner_qromb converged only to d=%g for cosine value #%d\n",err,fixed); - fflush(file); - no_convergence++; - } - return (abs_err); + int m,m0,comp; + double abs_res,abs_err; /* norms of result and error */ + double int_err; /* absolute error of previous layer integration */ + double err; + + if (input[PHI].Grid_size==1 || onepoint) { /* if only one point (really or assumed) */ + int_err=(*func)(fixed,0,res); + N_eval++; + return int_err; + } + m0=0; /* equals 0 for periodic, m otherwise */ + for (m=0;m=input[PHI].Jmin-1) { + abs_res=0.5*fabs(M_in[0][0]+T_in[0]); + abs_err=0.5*fabs(M_in[0][0]-T_in[0])+int_err; + if (abs_res==0) err=0; + else err=abs_err/abs_res; + if (err=input[PHI].eps) { + fprintf(file,"Inner_qromb converged only to d=%g for cosine value #%d\n",err,fixed); + fflush(file); + no_convergence++; + } + return (abs_err); } -//============================================================ +/*============================================================*/ static double OuterInitT(double *res) -/* Calculate term T_0^0 for the outer integration of func; - * returns absolute error of the inner integration - */ + /* Calculate term T_0^0 for the outer integration of func; + returns absolute error of the inner integration */ { - int comp; - double err; - - // calculate first point - err=InnerRomberg(0,res,input[THETA].min==-1 && full_al_range); - - if (!input[THETA].equival) { - // calculate last point - err=0.5*(err - +InnerRomberg(input[THETA].Grid_size-1,dummy_out,input[THETA].max==1 && full_al_range)); - for (comp=0;comp>n; - // init sum - for (comp=0;comp>1;j>n; + /* init sum */ + for (comp=0;comp>1;j=input[THETA].Jmin-1) { - abs_res=0.5*fabs(M_out[0][0]+T_out[0]); - // absolute error is sum of the errors for current integration and accumulated inner error - abs_err=0.5*fabs(M_out[0][0]-T_out[0])+int_err; - if (abs_res==0) err=0; - else err=abs_err/abs_res; - if (err=input[THETA].Jmin-1) { + abs_res=0.5*fabs(M_out[0][0]+T_out[0]); + /* absolute error is sum of the errors for current integration and accumulated inner error */ + abs_err=0.5*fabs(M_out[0][0]-T_out[0])+int_err; + if (abs_res==0) err=0; + else err=abs_err/abs_res; + if (err // for memcpy -#include // for cos, sin -#include "const.h" // for math constants -#include "types.h" // for doublecomplex -#include "function.h" // for INLINE +#include /* for memcpy */ +#include /* for cos, sin */ +#include "const.h" /* for math constants */ +#include "types.h" /* for doublecomplex */ +#include "function.h" /* for INLINE */ -//============================================================ -// operations on complex numbers +/*============================================================*/ +/* operations on complex numbers */ INLINE void cEqual(const doublecomplex a,doublecomplex b) -// performs b=a + /* performs b=a */ { - memcpy(b,a,sizeof(doublecomplex)); + memcpy(b,a,sizeof(doublecomplex)); } -//============================================================ +/*============================================================*/ INLINE double cAbs2(const doublecomplex a) -// square of absolute value of complex number; |a|^2 + /* square of absolute value of complex number; |a|^2 */ { - return (a[RE]*a[RE] + a[IM]*a[IM]); + return (a[RE]*a[RE] + a[IM]*a[IM]); } -//============================================================ +/*============================================================*/ INLINE double cAbs(const doublecomplex a) -// absolute value of complex number |a|, specially designed to avoid overflow + /* absolute value of complex number |a|, specially designed to avoid overflow */ { - double u,v,w; - u=fabs(a[RE]); - v=fabs(a[IM]); + double u,v,w; + u=fabs(a[RE]); + v=fabs(a[IM]); - if (u==0 && v==0) return 0; - else { - if (u>=v) { - w=v/u; - return (u*sqrt(1+w*w)); - } - else { - w=u/v; - return (v*sqrt(1+w*w)); - } - } + if (u==0 && v==0) return 0; + else { + if (u>=v) { + w=v/u; + return (u*sqrt(1+w*w)); + } + else { + w=u/v; + return (v*sqrt(1+w*w)); + } + } } -//============================================================ +/*============================================================*/ INLINE void cConj(const doublecomplex a,doublecomplex b) -// complex conjugate; b=a* + /* complex conjugate; b=a* */ { - b[RE] = a[RE]; - b[IM] = - a[IM]; + b[RE] = a[RE]; + b[IM] = - a[IM]; } -//============================================================ +/*============================================================*/ INLINE void cAdd(const doublecomplex a,const doublecomplex b,doublecomplex c) -// add two complex numbers; c=a+b + /* add two complex numbers; c=a+b */ { - c[RE] = a[RE] + b[RE]; - c[IM] = a[IM] + b[IM]; + c[RE] = a[RE] + b[RE]; + c[IM] = a[IM] + b[IM]; } -//============================================================ +/*============================================================*/ INLINE void cSubtr(const doublecomplex a,const doublecomplex b,doublecomplex c) -// subtract two complex numbers; c=a-b + /* subtract two complex numbers; c=a-b */ { - c[RE] = a[RE] - b[RE]; - c[IM] = a[IM] - b[IM]; + c[RE] = a[RE] - b[RE]; + c[IM] = a[IM] - b[IM]; } -//============================================================ +/*============================================================*/ INLINE void cSquare(const doublecomplex a,doublecomplex b) -// square of complex number; b=a^2 + /* square of complex number; b=a^2 */ { - b[RE]=a[RE]*a[RE] - a[IM]*a[IM]; - b[IM]=2*a[IM]*a[RE]; + b[RE]=a[RE]*a[RE] - a[IM]*a[IM]; + b[IM]=2*a[IM]*a[RE]; } -//============================================================ +/*============================================================*/ INLINE void cMultReal(const double a,const doublecomplex b,doublecomplex c) -// complex multiplication by real; c=ab + /* complex multiplication by real; c=ab */ { - c[RE]=a*b[RE]; - c[IM]=a*b[IM]; + c[RE]=a*b[RE]; + c[IM]=a*b[IM]; } -//============================================================ +/*============================================================*/ INLINE void cMult_i(doublecomplex c) -// complex multiplication by i; c=i*c + /* complex multiplication by i; c=i*c */ { - double tmp; - tmp=c[RE]; - c[RE]=-c[IM]; - c[IM]=tmp; + double tmp; + tmp=c[RE]; + c[RE]=-c[IM]; + c[IM]=tmp; } -//============================================================ +/*============================================================*/ INLINE void cMult_i2(doublecomplex a,doublecomplex b) -// complex multiplication by i; b=i*a; !!! b and c should be different !!! + /* complex multiplication by i; b=i*a + !!! b and c should be different !!! */ { - b[RE]=-a[IM]; - b[IM]=a[RE]; + b[RE]=-a[IM]; + b[IM]=a[RE]; } -//============================================================ +/*============================================================*/ INLINE void cMult(const doublecomplex a,const doublecomplex b,doublecomplex c) -// complex multiplication; c=ab; !!! c should be different from a and b !!! + /* complex multiplication; c=ab */ + /* !!! c should be different from a and b !!! */ { - c[RE]=a[RE]*b[RE] - a[IM]*b[IM]; - c[IM]=a[IM]*b[RE] + a[RE]*b[IM]; + c[RE]=a[RE]*b[RE] - a[IM]*b[IM]; + c[IM]=a[IM]*b[RE] + a[RE]*b[IM]; } -//============================================================ +/*============================================================*/ INLINE void cMultSelf(doublecomplex a,const doublecomplex b) -// complex multiplication; a*=b + /* complex multiplication; a*=b */ { - double tmp; + double tmp; - tmp=a[RE]; - a[RE]=a[RE]*b[RE] - a[IM]*b[IM]; - a[IM]=a[IM]*b[RE] + tmp*b[IM]; + tmp=a[RE]; + a[RE]=a[RE]*b[RE] - a[IM]*b[IM]; + a[IM]=a[IM]*b[RE] + tmp*b[IM]; } -//============================================================ +/*============================================================*/ INLINE double cMultConRe(const doublecomplex a,const doublecomplex b) -// complex multiplication; returns real(a*b_conjugated) + /* complex multiplication; returns real(a*b_conjugated) */ { - return (a[RE]*b[RE] + a[IM]*b[IM]); + return (a[RE]*b[RE] + a[IM]*b[IM]); } -//============================================================ +/*============================================================*/ INLINE double cMultConIm(const doublecomplex a,const doublecomplex b) -// complex multiplication; returns imag(a*b_conjugated) + /* complex multiplication; returns imag(a*b_conjugated) */ { - return (a[IM]*b[RE] - a[RE]*b[IM]); + return (a[IM]*b[RE] - a[RE]*b[IM]); } -//============================================================ +/*============================================================*/ INLINE void cLinComb(const doublecomplex a,const doublecomplex b, const double c1,const double c2,doublecomplex c) -// linear combination of two complex numbers; c=c1*a+c2*b + /* linear combination of two complex numbers; c=c1*a+c2*b */ { - c[RE]=c1*a[RE]+c2*b[RE]; - c[IM]=c1*a[IM]+c2*b[IM]; + c[RE]=c1*a[RE]+c2*b[RE]; + c[IM]=c1*a[IM]+c2*b[IM]; } -//============================================================ +/*============================================================*/ INLINE void cInvSign(doublecomplex a) -// change sign of complex number; a*=-1; + /* change sign of complex number; a*=-1; */ { - a[RE] = - a[RE]; - a[IM] = - a[IM]; + a[RE] = - a[RE]; + a[IM] = - a[IM]; } -//============================================================ +/*============================================================*/ INLINE void cInvSign2(const doublecomplex a,doublecomplex b) -// change sign of complex number and store to different address; b=-a; + /* change sign of complex number and store to different address; b=-a; */ { - b[RE] = - a[RE]; - b[IM] = - a[IM]; + b[RE] = - a[RE]; + b[IM] = - a[IM]; } -//============================================================ +/*============================================================*/ INLINE void cInv(const doublecomplex a,doublecomplex b) -// complex inversion; b=1/a; designed to avoid under and overflows + /* complex inversion; b=1/a; designed to avoid under and overflows */ { - double tmp; + double tmp; - if (fabs(a[RE])>=fabs(a[IM])) { - tmp=a[IM]/a[RE]; - b[RE]=1/(a[RE]+a[IM]*tmp); - b[IM]=-b[RE]*tmp; - } - else { - tmp=a[RE]/a[IM]; - b[IM]=-1/(a[RE]*tmp+a[IM]); - b[RE]=-b[IM]*tmp; - } + if (fabs(a[RE])>=fabs(a[IM])) { + tmp=a[IM]/a[RE]; + b[RE]=1/(a[RE]+a[IM]*tmp); + b[IM]=-b[RE]*tmp; + } + else { + tmp=a[RE]/a[IM]; + b[IM]=-1/(a[RE]*tmp+a[IM]); + b[RE]=-b[IM]*tmp; + } } -//============================================================ +/*============================================================*/ INLINE double cInvIm(const doublecomplex a) -// returns Im of inverse of a; designed to avoid under and overflows + /* returns Im of inverse of a; designed to avoid under and overflows */ { - double tmp; + double tmp; - if (fabs(a[RE])>=fabs(a[IM])) { - tmp=a[IM]/a[RE]; - return (-tmp/(a[RE]+a[IM]*tmp)); - } - else { - tmp=a[RE]/a[IM]; - return (-1/(a[RE]*tmp+a[IM])); - } + if (fabs(a[RE])>=fabs(a[IM])) { + tmp=a[IM]/a[RE]; + return (-tmp/(a[RE]+a[IM]*tmp)); + } + else { + tmp=a[RE]/a[IM]; + return (-1/(a[RE]*tmp+a[IM])); + } } -//============================================================ +/*============================================================*/ INLINE void cDiv(const doublecomplex a,const doublecomplex b,doublecomplex c) -/* complex division; c=a/b; designed to avoid under and overflows - * !!! c should be different from a !!! - */ + /* complex division; c=a/b; designed to avoid under and overflows */ + /* !!! c should be different from a !!! */ { - double u,v; + double u,v; - if (fabs(b[RE])>=fabs(b[IM])) { - u=b[IM]/b[RE]; - v=1/(b[RE]+b[IM]*u); - c[RE]=(a[RE]+a[IM]*u)*v; - c[IM]=(a[IM]-a[RE]*u)*v; - } - else { - u=b[RE]/b[IM]; - v=1/(b[RE]*u+b[IM]); - c[RE]=(a[RE]*u+a[IM])*v; - c[IM]=(a[IM]*u-a[RE])*v; - } + if (fabs(b[RE])>=fabs(b[IM])) { + u=b[IM]/b[RE]; + v=1/(b[RE]+b[IM]*u); + c[RE]=(a[RE]+a[IM]*u)*v; + c[IM]=(a[IM]-a[RE]*u)*v; + } + else { + u=b[RE]/b[IM]; + v=1/(b[RE]*u+b[IM]); + c[RE]=(a[RE]*u+a[IM])*v; + c[IM]=(a[IM]*u-a[RE])*v; + } } -//============================================================ +/*============================================================*/ INLINE void cDivSelf(doublecomplex a,const doublecomplex b) -// complex division; a/=b; designed to avoid under and overflows + /* complex division; a/=b; designed to avoid under and overflows */ { - double u,v,w; + double u,v,w; - w=a[RE]; - if (fabs(b[RE])>=fabs(b[IM])) { - u=b[IM]/b[RE]; - v=1/(b[RE]+b[IM]*u); - a[RE]=(w+a[IM]*u)*v; - a[IM]=(a[IM]-w*u)*v; - } - else { - u=b[RE]/b[IM]; - v=1/(b[RE]*u+b[IM]); - a[RE]=(w*u+a[IM])*v; - a[IM]=(a[IM]*u-w)*v; - } + w=a[RE]; + if (fabs(b[RE])>=fabs(b[IM])) { + u=b[IM]/b[RE]; + v=1/(b[RE]+b[IM]*u); + a[RE]=(w+a[IM]*u)*v; + a[IM]=(a[IM]-w*u)*v; + } + else { + u=b[RE]/b[IM]; + v=1/(b[RE]*u+b[IM]); + a[RE]=(w*u+a[IM])*v; + a[IM]=(a[IM]*u-w)*v; + } } -//============================================================ +/*============================================================*/ INLINE void cSqrt(const doublecomplex a,doublecomplex b) -/* complex square root; b=sqrt(a); designed to avoid under and overflows; - * branch cut discontinuity is (-inf,0) - b[RE]>=0 - */ -{ - double u,v,w,r; - - u=fabs(a[RE]); - v=fabs(a[IM]); - if (u==0 && v==0) b[RE]=b[IM]=0; - else { - // first determine w - if (u>=v) { - r=v/u; - w=sqrt(u)*sqrt((1+sqrt(1+r*r))/2); - } - else { - r=u/v; - w=sqrt(v)*sqrt((r+sqrt(1+r*r))/2); - } - // compute the result - if (a[RE]>=0) { - b[RE]=w; - b[IM]=a[IM]/(2*w); - } - else { - b[RE]=v/(2*w); - if (a[IM]>=0) b[IM]=w; - else b[IM]=-w; - } - } -} - -//============================================================ + /* complex square root; b=sqrt(a); designed to avoid under and overflows + branch cut discontinuity is (-inf,0) - b[RE]>=0 */ +{ + double u,v,w,r; + + u=fabs(a[RE]); + v=fabs(a[IM]); + if (u==0 && v==0) b[RE]=b[IM]=0; + else { + /* first determine w */ + if (u>=v) { + r=v/u; + w=sqrt(u)*sqrt((1+sqrt(1+r*r))/2); + } + else { + r=u/v; + w=sqrt(v)*sqrt((r+sqrt(1+r*r))/2); + } + /* compute the result */ + if (a[RE]>=0) { + b[RE]=w; + b[IM]=a[IM]/(2*w); + } + else { + b[RE]=v/(2*w); + if (a[IM]>=0) b[IM]=w; + else b[IM]=-w; + } + } +} + +/*============================================================*/ INLINE void imExp(const double arg,doublecomplex c) -// exponent of imaginary argument c=Exp(i*arg); optimization is performed by compiler + /* exponent of imaginary argument c=Exp(i*arg) + Optimization is performed by compiler */ { - c[RE]=cos(arg); - c[IM]=sin(arg); + c[RE]=cos(arg); + c[IM]=sin(arg); } -//============================================================ +/*============================================================*/ INLINE void imExp_arr(const double arg,const int size,doublecomplex *c) -/* construct an array of exponent of imaginary argument c=Exp(i*k*arg) - * where k=0,1,...,size-1. Uses stable recurrence from Numerical Recipes. - * Optimization of the initial simultaneous calculation of sin and cos is performed - * by compiler; It is assumed that size is at least 1 - */ -{ - int k; - double a,b; - - c[0][RE]=1; - c[0][IM]=0; - if (size>1) { - // set a=2*sin^2(arg/2), b=sin(arg) - a=sin(arg/2); - b=cos(arg/2); - b*=2*a; - a*=2*a; - // this a bit faster than in the main cycle - c[1][RE]=1-a; - c[1][IM]=b; - // main cycle - for (k=2;k1) { + /* set a=2*sin^2(arg/2), b=sin(arg) */ + a=sin(arg/2); + b=cos(arg/2); + b*=2*a; + a*=2*a; + /* this a bit faster than in the main cycle */ + c[1][RE]=1-a; + c[1][IM]=b; + /* main cycle */ + for (k=2;k #endif #ifdef MPI -MPI_Datatype mpi_dcomplex; + MPI_Datatype mpi_dcomplex; #endif /* whether a synchronize call should be performed before parallel timing. It makes communication - * timing more accurate, but may deteriorate overall performance by introducing unnecessary - * delays (test showed only slight difference for granule generator) */ + timing more accurate, but may deteriorate overall performance by introducing unnecessary + delays (test showed only slight difference for granule generator) */ #define SYNCHRONIZE_TIMING -// SEMI-GLOBAL VARIABLES +/* SEMI-GLOBAL VARIABLES */ -// defined and allocated in fft.c +/* defined and allocated in fft.c */ extern double *BT_buffer, *BT_rbuffer; -// defined and initialized in timing.c +/* defined and initialized in timing.c */ extern TIME_TYPE Timing_Dm_Init_comm; -// LOCAL VARIABLES +/* LOCAL VARIABLES */ #ifdef PARALLEL -static int Ntrans; // number of transmissions; used in CalcPartner -static int *gr_comm_size; // sizes of transmissions for granule generator communications -static int *gr_comm_overl; // shows whether two sequential transmissions overlap -static unsigned char *gr_comm_ob; // buffer for overlaps -static void *gr_comm_buf; // buffer for MPI transfers - -// First several functions are defined only in parallel mode -//=========================================== +static int Ntrans; /* number of transmissions; used in CalcPartner */ +static int *gr_comm_size; /* sizes of transmissions for granule generator communications */ +static int *gr_comm_overl; /* shows whether two sequential transmissions overlap */ +static unsigned char *gr_comm_ob; /* buffer for overlaps */ +static void *gr_comm_buf; /* buffer for MPI transfers */ +/* First funtions that are defined only in parallel mode */ +/*===========================================*/ static void RecoverCommandLine(int *argc_p,char ***argv_p) -/* eliminate all NULL pointers from argv, shift the rest, and adjust argc accordingly. - * Used in InitComm - * */ + /* eliminate all NULL pointers from argv, shift the rest, and + adjust argc accordingly. Used in InitComm */ { - int i,j; + int i,j; - for (i=0,j=0;i<(*argc_p);i++) { - if ((*argv_p)[i]==NULL) j++; - else if (j!=0) (*argv_p)[i-j]=(*argv_p)[i]; - } - (*argc_p)-=j; + for (i=0,j=0;i<(*argc_p);i++) { + if ((*argv_p)[i]==NULL) j++; + else if (j!=0) (*argv_p)[i-j]=(*argv_p)[i]; + } + (*argc_p)-=j; } -//============================================================ +/*============================================================*/ INLINE size_t IndexBlock(const size_t x,const size_t y,const size_t z,const size_t lengthY) -// index block; used in BlockTranspose + /* index block; used in BlockTranspose */ { - return((z*lengthY+y)*gridX+x); + return((z*lengthY+y)*gridX+x); } -//============================================================ +/*============================================================*/ INLINE int CalcPartner(const int tran) -/* calculate ringid of partner processor for current transmission; used in BlockTranspose. Many - * different implementations are possible; the only requirements are - * 1) f(tran,f(tran,ringid))=ringid - * 2) f({1,2,Ntrans},ringid)={0,1,Ntrans}\ringid - * where f=nprocs is equivalent to skipping this transmission (relevant for odd nprocs) - */ + /* calculate ringid of partner processor for current transmission; + used in BlockTranspose + many different implementations are possible; the only requirements are + 1) f(tran,f(tran,ringid))=ringid + 2) f({1,2,Ntrans},ringid)={0,1,Ntrans}\ringid + where f=nprocs <=> skip this transmission (for odd nprocs) */ { - int part; - - if (ringid==0) part=tran; - else if (ringid==tran) part=0; - else { - part=2*tran-ringid; - if (part<=0) part+=Ntrans; - else if (part>Ntrans) part-=Ntrans; - } - return part; + int part; + + if (ringid==0) part=tran; + else if (ringid==tran) part=0; + else { + part=2*tran-ringid; + if (part<=0) part+=Ntrans; + else if (part>Ntrans) part-=Ntrans; + } + return part; } -//============================================================ +/*===========================================*/ void CatNFiles(const char *dir,const char *tmpl,const char *dest) -/* cat several temporary files (one for each processor, names defined by the template 'tmpl' that - * should contain %d to be replaced by ringid). Files are located in directory 'dir'. Combined into - * 'dest' in the same directory. Afterwards temporary files are removed. - */ + /* cat several temporary files (one for each processor, names defines by the template 'temp' + that should contain %d to be replaced by ringid). Files are located in directory 'dir'. + Combined into 'dest' in the same directory. Afterwards temporary files are removed. */ { - int i,c; - FILE *in,*out; - char fname_out[MAX_TMP_FNAME],fname_in[MAX_TMP_FNAME]; - - // produce full path of destination file and open it - sprintf(fname_out,"%s/%s",directory,dest); - out=FOpenErr(fname_out,"w",ONE_POS); - for (i=0;i boxZ) local_z1_coer=boxZ; - else local_z1_coer=local_z1; - unitX=gridX/nprocs; - local_x0=ringid*unitX; - local_x1=(ringid+1)*unitX; + unitZ=smallZ/nprocs; /* this should always be an exact division */ + local_z0=ringid*unitZ; + local_z1=(ringid+1)*unitZ; + if (local_z1 > boxZ) local_z1_coer=boxZ; + else local_z1_coer=local_z1; + unitX=gridX/nprocs; + local_x0=ringid*unitX; + local_x1=(ringid+1)*unitX; #else - local_z0=0; - local_z1=smallZ; - local_z1_coer=boxZ; - local_x0=0; - local_x1=gridX; + local_z0=0; + local_z1=smallZ; + local_z1_coer=boxZ; + local_x0=0; + local_x1=gridX; #endif - if (local_z1_coer<=local_z0) { - LogError(EC_INFO,ALL_POS,"No real dipoles are assigned"); - local_z1_coer=local_z0; - } - local_Nz=local_z1-local_z0; - local_Nx=local_x1-local_x0; - local_Ndip=MultOverflow(boxX*(size_t)boxY,local_z1_coer-local_z0,ALL_POS,"local_Ndip"); - /* conversions to (unsigned long) are needed (to remove warnings) because %z printf argument is - * not yet supported by all target compiler environments - */ - printf("%i : %i %i %i %lu %lu \n", - ringid,local_z0,local_z1_coer,local_z1,(unsigned long)local_Ndip,(unsigned long)local_Nx); + if (local_z1_coer<=local_z0) { + LogError(EC_INFO,ALL_POS,"No real dipoles are assigned"); + local_z1_coer=local_z0; + } + local_Nz=local_z1-local_z0; + local_Nx=local_x1-local_x0; + local_Ndip=MultOverflow(boxX*(size_t)boxY,local_z1_coer-local_z0,ALL_POS,"local_Ndip"); + printf("%i : %i %i %i %u %u \n", + ringid,local_z0,local_z1_coer,local_z1,local_Ndip,local_Nx); } -//============================================================ +/*===========================================*/ void AllGather(void *x_from,void *x_to,const var_type type,size_t n_elem) -// Gather distributed arrays; works for all types + /* Gather distributed arrays; works for all types */ { #ifdef MPI - // TODO: need to be rewritten when n_elem are unequal on each processor - MPI_Datatype mes_type; - - if (type==char_type) mes_type = MPI_CHAR; - else if (type==int_type) mes_type = MPI_INT; - else if (type==double_type) mes_type = MPI_DOUBLE; - else if (type==cmplx_type) { - mes_type = MPI_DOUBLE; - n_elem *= 2; - } - else LogError(EC_ERROR,ONE_POS,"AllGather: variable type %u is not supported",type); - - MPI_Allgather(x_from,n_elem,mes_type,x_to,n_elem,mes_type,MPI_COMM_WORLD); + /* need to be rewritten when n_elem are unequal on each processor */ + MPI_Datatype mes_type; + + if (type==char_type) mes_type = MPI_CHAR; + else if (type==int_type) mes_type = MPI_INT; + else if (type==double_type) mes_type = MPI_DOUBLE; + else if (type==cmplx_type) { + mes_type = MPI_DOUBLE; + n_elem *= 2; + } + else LogError(EC_ERROR,ONE_POS,"AllGather: variable type %u is not supported",type); + + MPI_Allgather(x_from,n_elem,mes_type,x_to,n_elem,mes_type,MPI_COMM_WORLD); #endif } -//============================================================ +/*===========================================*/ #ifdef PARALLEL void CalcLocalGranulGrid(const double z0,const double z1,const double gdZ,const int gZ, - const int id,int *lz0,int *lz1) -/* calculates starting and ending (+1) cell of granule grid (lz0 & lz1) on a processor with - * ringid=id - */ + const int id,int *lz0,int *lz1) + /* calculates starting and ending (+1) cell of granule grid (lz0 & lz1) + on a processor with ringid=id */ { - int dzl,dzh; // similar to local_z0 and local_z1 - - dzl=local_Nz*id; - // should not be coerced because the result differs only for dzh>boxZ, then dzh-1>z0 - dzh=dzl+local_Nz; - if (dzl>z1) *lz0=*lz1=gZ; - else { - if (dzl>z0) *lz0=(int)floor((dzl-z0)/gdZ); - else *lz0=0; - if (dzh>z1) *lz1=gZ; - else if (dzh-1>z0) *lz1=(int)floor((dzh-z0-1)/gdZ)+1; - else *lz1=0; - } + int dzl,dzh; /* similar to local_z0 and local_z1 */ + + dzl=local_Nz*id; + dzh=dzl+local_Nz; /* should not be coerced because the result differs only for dzh>boxZ, + then dzh-1>z0 */ + if (dzl>z1) *lz0=*lz1=gZ; + else { + if (dzl>z0) *lz0=(int)floor((dzl-z0)/gdZ); + else *lz0=0; + if (dzh>z1) *lz1=gZ; + else if (dzh-1>z0) *lz1=(int)floor((dzh-z0-1)/gdZ)+1; + else *lz1=0; + } } #endif -//============================================================ +/*===========================================*/ void SetGranulComm(const double z0,const double z1,const double gdZ,const int gZ,const size_t gXY, - size_t max_gran,int *lz0,int *lz1,const int sm_gr) -/* sets communication for granule generator; max_gran - maximum number of granules in one set - * (used to allocate buffer); sm_gr - whether granules are small (simpler) - */ + size_t max_gran,int *lz0,int *lz1,const int sm_gr) + /* sets communication for granule generator + max_gran - maximum number of granules in one set (used to allocate buffer) + sm_gr - whether granules are small (simpler) */ { #ifdef PARALLEL - int i,loc0,loc1,loc1_prev=0; - - MALLOC_VECTOR(gr_comm_buf,void,max_gran*sizeof(char),ALL); - if (!sm_gr) { - if (ringid==ROOT) { - MALLOC_VECTOR(gr_comm_size,int,nprocs,ONE); - MALLOC_VECTOR(gr_comm_overl,int,nprocs-1,ONE); - // always allocated, not to mess with its freeing - MALLOC_VECTOR(gr_comm_ob,uchar,gXY,ONE); - /* The following is very inefficient (may be significantly optimized), but using one - * common function is more robust. - */ - for (i=0;i=0;i--) { - if (gr_comm_size[i]!=0) { - if (gr_comm_overl[i]) { - memcpy(gr_comm_ob,dom+index,unit); - index+=gXY; - } - MPI_Recv(dom+index-gXY*gr_comm_size[i],unit*gr_comm_size[i],MPI_UNSIGNED_CHAR,i,0, - MPI_COMM_WORLD,&status); - if (gr_comm_overl[i]) for (j=0;j=0;i--) { + if (gr_comm_size[i]!=0) { + if (gr_comm_overl[i]) { + memcpy(gr_comm_ob,dom+index,unit); + index+=gXY; + } + MPI_Recv(dom+index-gXY*gr_comm_size[i],unit*gr_comm_size[i],MPI_UNSIGNED_CHAR,i,0, + MPI_COMM_WORLD,&status); + if (gr_comm_overl[i]) for (j=0;j (B)) ? (B) : (A)) #define MAX(A,B) (((A) < (B)) ? (B) : (A)) -#define IS_EVEN(A) (((A)%2) == 0) -#define LENGTH(A) ((int)(sizeof(A)/sizeof(A[0]))) // length of any array (converted to int) +#define LENGTH(A) ((int)(sizeof(A)/sizeof(A[0]))) /* length of any array (converted to int) */ -// parallel definitions +/* parallel definitions */ #ifdef MPI #define PARALLEL #endif -/* ringid of root processor. Using ROOT!=0 should work, however it was not thoroughly tested. - * Hence do not change without necessity. - */ -#define ROOT 0 +#define ROOT 0 /* ringid of root processor */ + /* Using ROOT!=0 should work, however it was not thoroughly tested. + Hence do not change without necessity */ -// math constants rounded for 32 decimals +/* math constants rounded for 32 decimals */ #define PI 3.1415926535897932384626433832795 #define TWO_PI 6.283185307179586476925286766559 #define FOUR_PI 12.566370614359172953850573533118 @@ -54,158 +52,158 @@ #define EULER 0.57721566490153286060651209008241 #define FULL_ANGLE 360.0 -// determines the maximum number representable by size_t +/* determines the maximum number representable by size_t */ #ifndef SIZE_MAX #define SIZE_MAX ((size_t)-1) #endif -// sets the maximum box size; otherwise 'position' should be changed +/* sets the maximum box size; otherwise 'position' should be changed */ #define BOX_MAX USHRT_MAX -// sizes of some arrays -#define MAX_NMAT 15 // maximum number of different refractive indices (<256) -#define MAX_N_SH_PARMS 25 // maximum number of shape parameters -#define MAX_N_BEAM_PARMS 4 // maximum number of beam parameters - -// sizes of filenames and other strings -#define MAX_DIRNAME 300 // maximum length of dirname; increase THIS if any errors appear -#define MAX_FNAME_SH 100 // maximum length of filename (used for known names) -#define MAX_TMP_FNAME_SH 15 // maximum length of names of temporary files (short) -#define MAX_SYSTEM_CALL 10 // maximum string length of system call (itself) -#define MAX_WORD 10 // maximum length of a short word -#define MAX_LINE 50 // maximum length of a line - // size of buffer for reading lines (longer lines are handled robustly) -#define BUF_LINE 150 -#define MAX_PARAGRAPH 600 // maximum length of a paragraph (few lines) - -// derived sizes - // maximum string to create directory +/* sizes of some arrays */ +#define MAX_NMAT 15 /* maximum number of different refractive indices (<256) */ +#define MAX_N_SH_PARMS 25 /* maximum number of shape parameters */ +#define MAX_N_BEAM_PARMS 4 /* maximum number of beam parameters */ + +/* sizes of filenames and other strings */ + /* maximum length of dirname; increase THIS if any errors appear */ +#define MAX_DIRNAME 300 +#define MAX_FNAME_SH 100 /* maximum length of filename (used for known names) */ +#define MAX_TMP_FNAME_SH 15 /* maximum length of names of temporary files (short) */ +#define MAX_SYSTEM_CALL 10 /* maximum string length of system call (itself) */ +#define MAX_WORD 10 /* maximum length of a short word */ +#define MAX_LINE 50 /* maximum length of a line */ + /* size of buffer for reading lines (longer lines are handled robustly) */ +#define BUF_LINE 150 +#define MAX_PARAGRAPH 600 /* maximum length of a paragraph (few lines) */ +/* derived sizes */ + /* maximum string to create directory */ #define MAX_DIRSYS (MAX_DIRNAME + MAX_SYSTEM_CALL) - // maximum length of filename (including directory name) + /* maximum length of filename (including directory name) */ #define MAX_FNAME (MAX_DIRNAME + MAX_FNAME_SH) - // maximum length of temporary filename (including directory name) + /* maximum length of temporary filename (including directory name) */ #define MAX_TMP_FNAME (MAX_DIRNAME + MAX_TMP_FNAME_SH) - // maximum message that may include a filename (for PrintError) + /* maximum message that may include a filename (for PrintError) */ #define MAX_MESSAGE (MAX_FNAME + MAX_PARAGRAPH) - // maximum message that may include 2 filenames (for LogError) + /* maximum message that may include 2 filenames (for LogError) */ #define MAX_MESSAGE2 (2*MAX_FNAME + MAX_PARAGRAPH) -// widths of terminal used for output -#define DEF_TERM_WIDTH 80 // default -#define MIN_TERM_WIDTH 20 // ADDA never takes value less than that from environmental variables - -// shape types -#define SH_SPHERE 0 // sphere -#define SH_BOX 1 // box (may be rectangular) -#define SH_PRISMA 2 // prisma (triangular) -- not operational -#define SH_LINE 3 // line with width of one dipole -#define SH_COATED 4 // coated sphere -#define SH_SPHEREBOX 5 // sphere in a box -#define SH_RBC 6 // Red Blood Cell -#define SH_ELLIPSOID 7 // general ellipsoid -#define SH_SDISK_ROT 8 // disc cut of a sphere -- not operational -#define SH_CYLINDER 9 // cylinder -#define SH_READ 10 // read from file -#define SH_EGG 11 // egg -#define SH_CAPSULE 12 // capsule -#define SH_AXISYMMETRIC 13 // axisymmetric +/* widths of terminal used for output */ +#define DEF_TERM_WIDTH 80 /* default */ +#define MIN_TERM_WIDTH 20 /* no lesser value is accepted by ADDA from environmental variables */ + +/* shape types */ +#define SH_SPHERE 0 /* sphere */ +#define SH_BOX 1 /* box (may be rectangular) */ +#define SH_PRISMA 2 /* prisma (triangular) -- not operational */ +#define SH_LINE 3 /* line with width of one dipole */ +#define SH_COATED 4 /* coated sphere */ +#define SH_SPHEREBOX 5 /* sphere in a box */ +#define SH_RBC 6 /* Red Blood Cell */ +#define SH_ELLIPSOID 7 /* general ellipsoid */ +#define SH_SDISK_ROT 8 /* disc cut of a sphere -- not operational */ +#define SH_CYLINDER 9 /* cylinder */ +#define SH_READ 10 /* read from file */ +#define SH_EGG 11 /* egg */ +#define SH_CAPSULE 12 /* capsule */ /* TO ADD NEW SHAPE - * add a define starting with 'SH_' here; the number should be different from any others in this - * list. Add a descriptive comment. - */ - -// which way to calculate coupleconstant -#define POL_CM 0 // Clausius-Mossotti -#define POL_RR 1 // Radiative Reaction correction -#define POL_LDR 2 // Lattice Dispersion Relation -#define POL_CLDR 3 // Corrected Lattice Dispersion Relation -#define POL_FCD 4 // Filtered Coupled Dipoles -#define POL_SO 5 // Second Order formulation - -// how to calculate scattering quantities -#define SQ_DRAINE 0 // classical, as Draine -#define SQ_SO 1 // Second Order formulation - -// how to calculate interaction term -#define G_POINT_DIP 0 // as point dipoles -#define G_FCD 1 // Filtered Green's tensor (Filtered Coupled Dipoles) -#define G_FCD_ST 2 // quasi-static version of FCD -#define G_SO 3 // Second Order formulation - -// ldr constants -#define LDR_B1 1.8915316 -#define LDR_B2 -0.1648469 -#define LDR_B3 1.7700004 - -// 2nd_order constants -#define SO_B1 1.5867182 -#define SO_B2 0.13488017 -#define SO_B3 0.11895826 - -// two boundaries for separation between G_SO 'close', 'median', and 'far' -#define G_BOUND_CLOSE 1 // k*R^2/d < GB_CLOSE => 'close' -#define G_BOUND_MEDIAN 1 // k*R < GB_MEDIAN => 'median' - -// iterative methods; see iterative.c for info -#define IT_CGNR 0 -#define IT_BICGSTAB 1 -#define IT_BICG_CS 2 -#define IT_QMR_CS 3 - -// type of E field calculation -#define CE_NORMAL 0 // normal -#define CE_PARPER 1 // use symmetry to calculate both incident polarizations - // from one calculation of internal fields - -// path and size of tables + add a define starting with 'SH_' here; the number should be different from + any others in this list. Add a descriptive comment. */ + + +/* which way to calculate coupleconstant */ +#define POL_CM 0 /* Clausius Mossotti */ +#define POL_RR 1 /* Radiative Reaction correction */ +#define POL_LDR 2 /* Lattice Dispersion Relation */ +#define POL_CLDR 3 /* Corrected Lattice Dispersion Relation */ +#define POL_FCD 4 /* Filtered Coupled Dipoles */ +#define POL_SO 5 /* Second Order formulation */ + +/* how to calculate scattering quantities */ +#define SQ_DRAINE 0 /* classical, as Draine */ +#define SQ_SO 1 /* Second Order formulation */ + +/* how to calculate interaction term */ +#define G_POINT_DIP 0 /* as point dipoles */ +#define G_FCD 1 /* Filtered Green's tensor (Filtered Coupled Dipoles) */ +#define G_FCD_ST 2 /* Quasistatis version of FCD */ +#define G_SO 3 /* Second Order formulation */ + +/* ldr constants */ +#define LDR_B1 1.8915316 +#define LDR_B2 -0.1648469 +#define LDR_B3 1.7700004 + +/* 2nd_order constants */ +#define SO_B1 1.5867182 +#define SO_B2 0.13488017 +#define SO_B3 0.11895826 + +/* two boundaries for separation between G_SO 'close', 'median', and 'far' */ +#define G_BOUND_CLOSE 1 /* k*R^2/d < GB_CLOSE => 'close' */ +#define G_BOUND_MEDIAN 1 /* k*R < GB_MEDIAN => 'median' */ + +/* iterative methods; see iterative.c for info */ +#define IT_CGNR 0 +#define IT_BICGSTAB 1 +#define IT_BICG_CS 2 +#define IT_QMR_CS 3 + +/* type of E field calculation */ +#define CE_NORMAL 0 /* normal */ +#define CE_PARPER 1 /* use symmetry to calculate both incident polarizations + from one calculation of internal fields */ + +/* path and size of tables */ #define TAB_PATH "tables/" -#define TAB_FNAME(a) "t" #a "f.dat" // a is a number, e.g. TAB_FNAME(2) -> "t2f.dat" +#define TAB_FNAME(a) "t" #a "f.dat" /* a is a number, e.g. TAB_FNAME(2) -> "t2f.dat" */ #define TAB_SIZE 142 #define TAB_RMAX 10 -// beam types -#define B_PLANE 0 -#define B_LMINUS 1 -#define B_DAVIS3 2 -#define B_BARTON5 3 - -// types of scattering grid -#define SG_GRID 0 // grid of angles -#define SG_PAIRS 1 // set of independent pairs -// types of angles set -#define SG_RANGE 0 // range with uniformly spaced points -#define SG_VALUES 1 // any set of values - -// types of phi_integr (should be different one-bit numbers) -#define PHI_UNITY 1 // just integrate -#define PHI_COS2 2 // integrate with cos(2*phi) -#define PHI_SIN2 4 // integrate with sin(2*phi) -#define PHI_COS4 8 // integrate with cos(4*phi) -#define PHI_SIN4 16 // integrate with sin(4*phi) - -// ways to treat particle symmetries -#define SYM_AUTO 0 // automatic -#define SYM_NO 1 // do not take into account -#define SYM_ENF 2 // enforce - -// types of checkpoint (to save) -#define CHP_NONE 0 // do not save checkpoint -#define CHP_NORMAL 1 // save checkpoint if not finished in time and exit -#define CHP_REGULAR 2 // save checkpoints in regular time intervals (until finished or halted) -#define CHP_ALWAYS 3 /* save checkpoint either if finished or time elapsed - * and calculate all scattering quantities - */ -// return values for functions -#define CHP_EXIT -2 // exit after saving checkpoint - -// default values; other are specified in InitVariables (param.c) +/* beam types */ +#define B_PLANE 0 +#define B_LMINUS 1 +#define B_DAVIS3 2 +#define B_BARTON5 3 + +/* types of scattering grid */ +#define SG_GRID 0 /* grid of angles */ +#define SG_PAIRS 1 /* set of independent pairs */ +/* types of angles set */ +#define SG_RANGE 0 /* range with uniformly spaced points */ +#define SG_VALUES 1 /* any set of values */ + +/* types of phi_integr (should be different one-bit numbers) */ +#define PHI_UNITY 1 /* just integrate */ +#define PHI_COS2 2 /* integrate with cos(2*phi) */ +#define PHI_SIN2 4 /* integrate with sin(2*phi) */ +#define PHI_COS4 8 /* integrate with cos(4*phi) */ +#define PHI_SIN4 16 /* integrate with sin(4*phi) */ + +/* ways to treat particle symmetries */ +#define SYM_AUTO 0 /* automatic */ +#define SYM_NO 1 /* do not take into account */ +#define SYM_ENF 2 /* enforce */ + +/* types of checkpoint (to save) */ +#define CHP_NONE 0 /* do not save checkpoint */ +#define CHP_NORMAL 1 /* save checkpoint if not finished in time and exit */ +#define CHP_REGULAR 2 /* save checkpoints in regular time intervals + (until finished or halted) */ +#define CHP_ALWAYS 3 /* save checkpoint either if finished or time elapsed + and calculate all the scattering quantities */ + +/* return values for functions */ +#define CHP_EXIT -2 /* exit after saving checkpoint */ + +/* default values; other are specified in InitVariables (param.c) */ #define DEF_GRID (16*jagged) -#define MIN_AUTO_GRID 16 // minimum grid, when set from default dpl +#define MIN_AUTO_GRID 16 /* minimum grid, when set from default dpl */ -// numbers less than this value (compared to unity) are considered to be zero +/* numbers less than this value (compared to unity) are considered to be zero */ #define ROUND_ERR 1E-15 -// output and input file and directory names (can only be changed at compile time) +/* output and input file and dir names (can only be changed at compile time) */ #define F_EXPCOUNT "ExpCount" #define F_EXPCOUNT_LCK F_EXPCOUNT ".lck" #define F_CS "CrossSec" @@ -213,21 +211,20 @@ #define F_INTFLD "IntField" #define F_DIPPOL "DipPol" #define F_BEAM "IncBeam" -#define F_GRANS "granules" - // suffixes + /* suffixes */ #define F_XSUF "-X" #define F_YSUF "-Y" - // logs + /* logs */ #define F_LOG "log" -#define F_LOG_ERR "logerr.%d" // ringid as argument +#define F_LOG_ERR "logerr.%d" /* ringid as argument */ #define F_LOG_ORAVG "log_orient_avg" #define F_LOG_INT_CSCA "log_int_Csca" #define F_LOG_INT_ASYM "log_int_asym" - // log suffixes + /* log suffixes */ #define F_LOG_X "_x" #define F_LOG_Y "_y" #define F_LOG_Z "_z" - // Mueller files + /* mueller files */ #define F_MUEL "mueller" #define F_MUEL_SG "mueller_scatgrid" #define F_MUEL_INT "mueller_integr" @@ -235,42 +232,41 @@ #define F_MUEL_S2 "mueller_integr_s2" #define F_MUEL_C4 "mueller_integr_c4" #define F_MUEL_S4 "mueller_integr_s4" - // temporary files; used in printf with ringid as argument + /* temporary files; used in printf with ringid as argument */ #define F_BEAM_TMP "b%d.tmp" #define F_INTFLD_TMP "f%d.tmp" #define F_DIPPOL_TMP "p%d.tmp" #define F_GEOM_TMP "g%d.tmp" - // checkpoint files + /* checkpoint files */ #define F_CHP_LOG "chp.log" -#define F_CHP "chp.%d" // ringid as argument +#define F_CHP "chp.%d" /* ringid as argument */ -// default file and directory names; can be changed by command line options +/* default file and dir names; can be changed by command line options */ #define FD_ALLDIR_PARMS "alldir_params.dat" #define FD_AVG_PARMS "avg_params.dat" #define FD_SCAT_PARMS "scat_params.dat" #define FD_CHP_DIR "chpoint" -// number of components of D +/* number of components of D */ #define NDCOMP 6 -// shape formats; numbers should be nonnegative -#define SF_TEXT 0 // ADDA text format for one-domain particles -#define SF_TEXT_EXT 1 // ADDA text format for multi-domain particles -#define SF_DDSCAT 2 // DDSCAT 6.1 format (FRMFIL), produced by calltarget - -//************* Global Defines and Data structures (all for LogError) ***************** - -#define POSIT __FILE__,__LINE__ // position of the error in source code -// who definitions -#define ALL 0 // each processor may report this error -#define ONE 1 // only root processor reports an error - -// derived; for simplicity -#define ALL_POS ALL,POSIT -#define ONE_POS ONE,POSIT -// error codes -#define EC_ERROR 1 // error -#define EC_WARN 2 // warning -#define EC_INFO 3 // slight warning, that does not interfere at all with normal execution - -#endif // __const_h +/* shape formats; numbers should be nonnegative */ +#define SF_TEXT 0 /* ADDA text format for one-domain particles */ +#define SF_TEXT_EXT 1 /* ADDA text format for multi-domain particles */ +#define SF_DDSCAT 2 /* DDSCAT 6.1 format (FRMFIL), produced by calltarget */ + +/************** Global Defines and Data structures (all for LogError) *****************/ + +#define POSIT __FILE__,__LINE__ /* position of the error in source code */ +/* who definitions */ +#define ALL 0 /* each processor may report this error */ +#define ONE 1 /* only root processor reports an error */ + /* derived; for simplicity */ +#define ALL_POS ALL,POSIT +#define ONE_POS ONE,POSIT +/* error codes */ +#define EC_ERROR 1 /* error */ +#define EC_WARN 2 /* warning */ +#define EC_INFO 3 /* slight warning, that does not interfere at all with normal execution */ + +#endif /*__const_h*/ diff --git a/src/crosssec.c b/src/crosssec.c index b8a23081..07fb2d1c 100644 --- a/src/crosssec.c +++ b/src/crosssec.c @@ -1,6 +1,6 @@ /* FILE : crosssec.c * AUTH : Maxim Yurkin - * DESCR: All the functions to calculate scattering quantities (except Mueller matrix). + * DESCR: All the functions to calculate scattering qunatities (except Mueller matrix). * Functions to read different parameters from files. * Initialization of orientation of the particle. * @@ -25,1042 +25,1042 @@ #include "timing.h" #include "function.h" -// SEMI-GLOBAL VARIABLES +/* SEMI-GLOBAL VARIABLES */ -// defined and initialized in calculator.c +/* defined and initialized in calculator.c */ extern double *E2_alldir,*E2_alldir_buffer; extern const doublecomplex cc[][3]; extern doublecomplex *expsX,*expsY,*expsZ; -// defined and initialized in GenerateB.c +/* defined and initialized in GenerateB.c */ extern const double beam_center_0[3]; -// defined and initialized in param.c +/* defined and initialized in param.c */ extern const double prop_0[3],incPolX_0[3],incPolY_0[3]; extern const int ScatRelation; -// defined and initialized in timing.c -extern TIME_TYPE Timing_EField_ad,Timing_comm_EField_ad,Timing_EField_sg,Timing_comm_EField_sg, -Timing_ScatQuan_comm; +/* defined and initialized in timing.c */ +extern TIME_TYPE Timing_EField_ad,Timing_comm_EField_ad, + Timing_EField_sg,Timing_comm_EField_sg,Timing_ScatQuan_comm; -// used in CalculateE.c +/* used in CalculateE.c */ Parms_1D phi_sg; -// used in calculator.c -Parms_1D parms_alpha; // parameters of integration over alpha -Parms_1D parms[2]; // parameters for integration over theta,phi or beta,gamma -angle_set beta_int,gamma_int,theta_int,phi_int; // sets of angles -// used in param.c -char avg_string[MAX_PARAGRAPH]; // string for output of function that reads averaging parameters -// used in Romberg.c -int full_al_range; // whether full range of alpha angle is used +/* used in calculator.c */ +Parms_1D parms_alpha; /* parameters of integration over alpha */ +Parms_1D parms[2]; /* parameters for integration over theta,phi or beta,gamma */ +angle_set beta_int,gamma_int,theta_int,phi_int; /* sets of angles */ +/* used in param.c */ +char avg_string[MAX_PARAGRAPH]; /* string for output of function that reads averaging parameters */ +/* used in Romberg.c */ +int full_al_range; /* whether full range of alpha angle is used */ -//===================================================================== +/*=====================================================================*/ INLINE int AlldirIndex(const int theta,const int phi) -// Convert the (theta,phi) couple into a linear array index + /* Convert the (theta,phi) couple into a linear array index */ { - return (theta*phi_int.N + phi); + return (theta*phi_int.N + phi); } -//===================================================================== +/*=====================================================================*/ void InitRotation (void) -/* initialize matrices used for reference frame transformation; based on Mishchenko M.I. - * "Calculation of the amplitude matrix for a nonspherical particle in a fixed orientation", - * Applied Optics 39(6):1026-1031. This is so-called zyz-notation or y-convention. - */ + /* initialize matrices used for reference frame transformation + based on Mishchenko,M.I. "Calculation of the amplitude matrix + for a nonspherical particle in a fixed orientation", + Applied Optics 39(6):1026-1031. This is so-called zyz-notation + or y-convention. */ { - double ca,sa,cb,sb,cg,sg; - double beta_matr[3][3]; - double alph,bet,gam; // in radians - - // initialization of angle values in radians - alph=Deg2Rad(alph_deg); - bet=Deg2Rad(bet_deg); - gam=Deg2Rad(gam_deg); - // calculation of rotation matrix - ca=cos(alph); - sa=sin(alph); - cb=cos(bet); - sb=sin(bet); - cg=cos(gam); - sg=sin(gam); - - beta_matr[0][0]=ca*cb*cg-sa*sg; - beta_matr[0][1]=sa*cb*cg+ca*sg; - beta_matr[0][2]=-sb*cg; - beta_matr[1][0]=-ca*cb*sg-sa*cg; - beta_matr[1][1]=-sa*cb*sg+ca*cg; - beta_matr[1][2]=sb*sg; - beta_matr[2][0]=ca*sb; - beta_matr[2][1]=sa*sb; - beta_matr[2][2]=cb; - // rotation of incident field - MatrVec(beta_matr,prop_0,prop); - MatrVec(beta_matr,incPolY_0,incPolY); - MatrVec(beta_matr,incPolX_0,incPolX); - if (beam_asym) MatrVec(beta_matr,beam_center_0,beam_center); + double ca,sa,cb,sb,cg,sg; + double beta_matr[3][3]; + double alph,bet,gam; /* in radians */ + + /* initialization of angle values in radians */ + alph=Deg2Rad(alph_deg); + bet=Deg2Rad(bet_deg); + gam=Deg2Rad(gam_deg); + /* calculation of rotation matrix */ + ca=cos(alph); + sa=sin(alph); + cb=cos(bet); + sb=sin(bet); + cg=cos(gam); + sg=sin(gam); + + beta_matr[0][0]=ca*cb*cg-sa*sg; + beta_matr[0][1]=sa*cb*cg+ca*sg; + beta_matr[0][2]=-sb*cg; + beta_matr[1][0]=-ca*cb*sg-sa*cg; + beta_matr[1][1]=-sa*cb*sg+ca*cg; + beta_matr[1][2]=sb*sg; + beta_matr[2][0]=ca*sb; + beta_matr[2][1]=sa*sb; + beta_matr[2][2]=cb; + /* rotation of incident field */ + MatrVec(beta_matr,prop_0,prop); + MatrVec(beta_matr,incPolY_0,incPolY); + MatrVec(beta_matr,incPolX_0,incPolX); + if (beam_asym) MatrVec(beta_matr,beam_center_0,beam_center); } -//===================================================================== -// currently not used +/*=====================================================================*/ + /* currently not used */ static int ReadLine(FILE *,const char *,char *,const int) ATT_UNUSED; -static int ReadLine(FILE *file,const char *fname, // opened file and filename - char *buf,const int buf_size) // buffer for line and its size -// reads the first uncommented line; returns 1 if EOF reached +static int ReadLine(FILE *file,const char *fname, /* opened file and filename */ + char *buf,const int buf_size) /* buffer for line and its size */ + /* reads the first uncommented line; returns 1 if EOF reached */ { - while (!feof(file)) { - fgets(buf,buf_size,file); - if (*buf!='#') { // if uncommented - if (strstr(buf,"\n")==NULL && !feof(file)) LogError(EC_ERROR,ONE_POS, - "Buffer overflow while reading '%s' (size of uncommented line > %d)", - fname,buf_size-1); - else return 0; // complete line is read - } // finish reading the commented line - else while (strstr(buf,"\n")==NULL && !feof(file)) fgets(buf,buf_size,file); - } - return 1; + while (!feof(file)) { + fgets(buf,buf_size,file); + if (*buf!='#') { /* if uncommented */ + if (strstr(buf,"\n")==NULL && !feof(file)) LogError(EC_ERROR,ONE_POS, + "Buffer overflow while reading '%s' (size of uncommented line > %d)", + fname,buf_size-1); + else return 0; /* complete line is read */ + } /* finish reading the commented line */ + else while (strstr(buf,"\n")==NULL && !feof(file)) fgets(buf,buf_size,file); + } + return 1; } -//===================================================================== +/*=====================================================================*/ -static void ReadLineStart(FILE *file,const char *fname, // opened file and filename - char *buf,const int buf_size, // buffer for line and its size - const char *start) // beginning of the line to search -// reads the first line that starts with 'start' +static void ReadLineStart(FILE *file,const char *fname, /* opened file and filename */ + char *buf,const int buf_size, /* buffer for line and its size */ + const char *start) /* beginning of the line to search */ + /* reads the first line that starts with 'start' */ { - while (!feof(file)) { - fgets(buf,buf_size,file); - if (strstr(buf,start)==buf) { // if correct beginning - if (strstr(buf,"\n")==NULL && !feof(file)) LogError(EC_ERROR,ONE_POS, - "Buffer overflow while reading '%s' (size of essential line > %d)", - fname,buf_size-1); - else return; // line found and fits into buffer - } // finish reading unmatched line - else while (strstr(buf,"\n")==NULL && !feof(file)) fgets(buf,buf_size,file); - } - LogError(EC_ERROR,ONE_POS,"String '%s' is not found (in correct place) in file '%s'", - start,fname); + while (!feof(file)) { + fgets(buf,buf_size,file); + if (strstr(buf,start)==buf) { /* if correct beginning */ + if (strstr(buf,"\n")==NULL && !feof(file)) LogError(EC_ERROR,ONE_POS, + "Buffer overflow while reading '%s' (size of essential line > %d)", + fname,buf_size-1); + else return; /* line found and fits into buffer */ + } /* finish reading unmatched line */ + else while (strstr(buf,"\n")==NULL && !feof(file)) fgets(buf,buf_size,file); + } + LogError(EC_ERROR,ONE_POS, + "String '%s' is not found (in correct place) in file '%s'",start,fname); } -//===================================================================== +/*=====================================================================*/ -INLINE void ScanDouble(FILE *file,const char *fname,char *buf,const int buf_size,const char *start, - double *res) -/* scans double value from a line starting with exactly 'start'; contains the same arguments as - * ReadLineStart function, plus pointer to where the result should be placed - */ +INLINE void ScanDouble(FILE *file,const char *fname, /* arguments of the ReadLineStart function */ + char *buf,const int buf_size,const char *start, /* ... */ + double *res) /* result */ + /* scans double value from a line starting with exactly 'start' */ { - ReadLineStart(file,fname,buf,buf_size,start); - if (sscanf(buf+strlen(start),"%lf",res)!=1) LogError(EC_ERROR,ONE_POS, - "Error reading value after '%s' in file '%s'",start,fname); + ReadLineStart(file,fname,buf,buf_size,start); + if (sscanf(buf+strlen(start),"%lf",res)!=1) LogError(EC_ERROR,ONE_POS, + "Error reading value after '%s' in file '%s'",start,fname); } -//===================================================================== +/*=====================================================================*/ -INLINE void ScanInt(FILE *file,const char *fname,char *buf,const int buf_size,const char *start, - int *res) -/* scans integer value from a line starting with exactly 'start'; contains the same arguments as - * ReadLineStart function, plus pointer to where the result should be placed - */ +INLINE void ScanInt(FILE *file,const char *fname, /* arguments of the ReadLineStart function */ + char *buf,const int buf_size,const char *start, /* ... */ + int *res) /* result */ + /* scans integer value from a line starting with exactly 'start' */ { - double tmp; - - ReadLineStart(file,fname,buf,buf_size,start); - if (sscanf(buf+strlen(start),"%lf",&tmp)!=1) LogError(EC_ERROR,ONE_POS, - "Error reading value after '%s' in file '%s'",start,fname); - if (tmpINT_MAX) LogError(EC_ERROR,ONE_POS, - "Value after '%s' in file '%s' is out of integer bounds",start,fname); - if (sscanf(buf+strlen(start),"%d",res)!=1) LogError(EC_ERROR,ONE_POS, - "Error reading value after '%s' in file '%s'",start,fname); -} - -//===================================================================== + double tmp; -INLINE void ScanSizet(FILE *file,const char *fname,char *buf,const int buf_size,const char *start, - size_t *res) -/* scans large integer value from a line starting with exactly 'start'; contains the same arguments - * as ReadLineStart function, plus pointer to where the result should be placed. Conversion from - * (unsigned long) is needed (to remove warnings) because %z printf argument is not yet supported by - * all target compiler environments - */ + ReadLineStart(file,fname,buf,buf_size,start); + if (sscanf(buf+strlen(start),"%lf",&tmp)!=1) LogError(EC_ERROR,ONE_POS, + "Error reading value after '%s' in file '%s'",start,fname); + if (tmp INT_MAX) LogError(EC_ERROR,ONE_POS, + "Value after '%s' in file '%s' is out of integer bounds",start,fname); + if (sscanf(buf+strlen(start),"%d",res)!=1) LogError(EC_ERROR,ONE_POS, + "Error reading value after '%s' in file '%s'",start,fname); -{ - double tmp; - unsigned long res_tmp; - - ReadLineStart(file,fname,buf,buf_size,start); - if (sscanf(buf+strlen(start),"%lf",&tmp)!=1) LogError(EC_ERROR,ONE_POS, - "Error reading value after '%s' in file '%s'",start,fname); - if (tmp<0 || tmp>SIZE_MAX) LogError(EC_ERROR,ONE_POS, - "Value after '%s' in file '%s' is out of size_t bounds",start,fname); - if (sscanf(buf+strlen(start),"%lu",&res_tmp)!=1) LogError(EC_ERROR,ONE_POS, - "Error reading value after '%s' in file '%s'",start,fname); - *res=(size_t)res_tmp; } -//===================================================================== +/*=====================================================================*/ -INLINE void ScanString(FILE *file,const char *fname,char *buf,const int buf_size,const char *start, - char *res) -/* scans string value from a line starting with exactly 'start'; contains the same arguments as - * ReadLineStart function, plus pointer to where the result should be placed - */ +INLINE void ScanString(FILE *file,const char *fname, /* arguments of the ReadLineStart function */ + char *buf,const int buf_size,const char *start, /* ... */ + char *res) /* result */ + /* scans string value from a line starting with exactly 'start' */ { - ReadLineStart(file,fname,buf,buf_size,start); - if (sscanf(buf+strlen(start),"%s",res)!=1) LogError(EC_ERROR,ONE_POS, - "Error reading value after '%s' in file '%s'",start,fname); + ReadLineStart(file,fname,buf,buf_size,start); + if (sscanf(buf+strlen(start),"%s",res)!=1) LogError(EC_ERROR,ONE_POS, + "Error reading value after '%s' in file '%s'",start,fname); } -//===================================================================== +/*=====================================================================*/ static void ScanIntegrParms( - FILE *file,const char *fname, // opened file and filename - angle_set *a, // pointer to angle set - Parms_1D *b, // pointer to parameters of integration - const int ifcos, // if space angles equally in cos - char *buf,char* temp, // 2 buffers - const int buf_size) // and their size -// scan integration parameters for angles from file + FILE *file,const char *fname, /* opened file and filename */ + angle_set *a, /* pointer to angle set */ + Parms_1D *b, /* pointer to parameters of integration */ + const int ifcos, /* if space angles equally in cos */ + char *buf,char* temp, /* 2 buffers */ + const int buf_size) /* and their size */ + /* scan integration parameters for angles from file */ { - size_t i; - double unit; - - // scan file - ScanDouble(file,fname,buf,buf_size,"min=",&(a->min)); - ScanDouble(file,fname,buf,buf_size,"max=",&(a->max)); - ScanInt(file,fname,buf,buf_size,"Jmin=",&(b->Jmin)); - ScanInt(file,fname,buf,buf_size,"Jmax=",&(b->Jmax)); - ScanDouble(file,fname,buf,buf_size,"eps=",&(b->eps)); - ScanString(file,fname,buf,buf_size,"equiv=",temp); - if (strcmp(temp,"true")==0) b->equival=TRUE; - else if (strcmp(temp,"false")==0) b->equival=FALSE; - else LogError(EC_ERROR,ONE_POS,"Wrong argument of 'equiv' option in file %s",fname); - ScanString(file,fname,buf,buf_size,"periodic=",temp); - if (strcmp(temp,"true")==0) b->periodic=TRUE; - else if (strcmp(temp,"false")==0) b->periodic=FALSE; - else LogError(EC_ERROR,ONE_POS,"Wrong argument of 'periodic' option in file %s",fname); - - // fill all parameters - if (a->min==a->max) { - a->N=b->Grid_size=1; - b->Jmax=1; - } - else { - // consistency check - if (a->min>a->max) LogError(EC_ERROR,ONE_POS, - "Wrong range (min=%g, max=%g) in file %s (max must be >= min)",a->min,a->max,fname); - if (b->JmaxJmin) LogError(EC_ERROR,ONE_POS, - "Wrong Jmax (%d) in file %s; it must be >= Jmin (%d)",b->Jmax,fname,b->Jmin); - if (b->Jmin<1) LogError(EC_ERROR,ONE_POS, - "Wrong Jmin (%d) in file %s (must be >=1)",b->Jmin,fname); - if (b->eps<0) LogError(EC_ERROR,ONE_POS, - "Wrong eps (%g) in file %s (must be >=0)",b->eps,fname); - if (b->Jmax >= (int)(8*sizeof(int))) LogError(EC_ERROR,ONE_POS, - "Too large Jmax(%d) in file %s, it will cause integer overflow",b->Jmax,fname); - - a->N=b->Grid_size=(1 << b->Jmax) + 1; - if (b->equival && a->N>1) (a->N)--; - } - // initialize points of integration - MALLOC_VECTOR(a->val,double,a->N,ALL); - memory += a->N*sizeof(double); - - if (ifcos) { // make equal intervals in cos(angle) - // consistency check - if (a->min<0) LogError(EC_ERROR,ONE_POS, - "Wrong min (%g) in file %s (must be >=0 for this angle)",a->min,fname); - if (a->max>180) LogError(EC_ERROR,ONE_POS, - "Wrong max (%g) in file %s (must be <=180 for this angle)",a->max,fname); - b->min=cos(Deg2Rad(a->max)); - b->max=cos(Deg2Rad(a->min)); - if (fabs(b->min)min=0; // just for convenience of display in log file - if (fabs(b->max)max=0; - if (b->Grid_size==1) a->val[0]=a->min; - else { - unit = (b->max - b->min)/(b->Grid_size-1); - for (i=0;iN;i++) a->val[i] = Rad2Deg(acos(b->min+unit*i)); - } - } - else { // make equal intervals in angle - b->min=Deg2Rad(a->min); - b->max=Deg2Rad(a->max); - if (b->Grid_size==1) a->val[0]=a->min; - else { - unit = (a->max - a->min)/(b->Grid_size-1); - for (i=0;iN;i++) a->val[i] = a->min + unit*i; - } - } + size_t i; + double unit; + + /* scan file */ + ScanDouble(file,fname,buf,buf_size,"min=",&(a->min)); + ScanDouble(file,fname,buf,buf_size,"max=",&(a->max)); + ScanInt(file,fname,buf,buf_size,"Jmin=",&(b->Jmin)); + ScanInt(file,fname,buf,buf_size,"Jmax=",&(b->Jmax)); + ScanDouble(file,fname,buf,buf_size,"eps=",&(b->eps)); + ScanString(file,fname,buf,buf_size,"equiv=",temp); + if (strcmp(temp,"true")==0) b->equival=TRUE; + else if (strcmp(temp,"false")==0) b->equival=FALSE; + else LogError(EC_ERROR,ONE_POS,"Wrong argument of 'equiv' option in file %s",fname); + ScanString(file,fname,buf,buf_size,"periodic=",temp); + if (strcmp(temp,"true")==0) b->periodic=TRUE; + else if (strcmp(temp,"false")==0) b->periodic=FALSE; + else LogError(EC_ERROR,ONE_POS,"Wrong argument of 'periodic' option in file %s",fname); + + /* fill all parameters */ + if (a->min==a->max) { + a->N=b->Grid_size=1; + b->Jmax=1; + } + else { + /* consistency check */ + if (a->min>a->max) LogError(EC_ERROR,ONE_POS, + "Wrong range (min=%g, max=%g) in file %s (max must be >= min)",a->min,a->max,fname); + if (b->JmaxJmin) LogError(EC_ERROR,ONE_POS, + "Wrong Jmax (%d) in file %s; it must be >= Jmin (%d)",b->Jmax,fname,b->Jmin); + if (b->Jmin<1) LogError(EC_ERROR,ONE_POS, + "Wrong Jmin (%d) in file %s (must be >=1)",b->Jmin,fname); + if (b->eps<0) LogError(EC_ERROR,ONE_POS, + "Wrong eps (%g) in file %s (must be >=0)",b->eps,fname); + if (b->Jmax >= (int)(8*sizeof(int))) LogError(EC_ERROR,ONE_POS, + "Too large Jmax(%d) in file %s, it will cause integer overflow",b->Jmax,fname); + + a->N=b->Grid_size=(1 << b->Jmax) + 1; + if (b->equival && a->N>1) (a->N)--; + } + /* initialize points of integration */ + MALLOC_VECTOR(a->val,double,a->N,ALL); + memory += a->N*sizeof(double); + + if (ifcos) { /* make equal intervals in cos(angle) */ + /* consistency check */ + if (a->min<0) LogError(EC_ERROR,ONE_POS, + "Wrong min (%g) in file %s (must be >=0 for this angle)",a->min,fname); + if (a->max>180) LogError(EC_ERROR,ONE_POS, + "Wrong max (%g) in file %s (must be <=180 for this angle)",a->max,fname); + b->min=cos(Deg2Rad(a->max)); + b->max=cos(Deg2Rad(a->min)); + if (fabs(b->min)min=0; /* just for convenience of display in log file */ + if (fabs(b->max)max=0; + if (b->Grid_size==1) a->val[0]=a->min; + else { + unit = (b->max - b->min)/(b->Grid_size-1); + for (i=0;iN;i++) a->val[i] = Rad2Deg(acos(b->min+unit*i)); + } + } + else { /* make equal intervals in angle */ + b->min=Deg2Rad(a->min); + b->max=Deg2Rad(a->max); + if (b->Grid_size==1) a->val[0]=a->min; + else { + unit = (a->max - a->min)/(b->Grid_size-1); + for (i=0;iN;i++) a->val[i] = a->min + unit*i; + } + } } -//===================================================================== +/*=====================================================================*/ -static int ScanAngleSet(FILE *file,const char *fname, // opened file and filename - angle_set *a, // pointers to angle set - char *buf,char *temp, // 2 buffers - const int buf_size) // and their size -// scan range or set of angles (theta or phi) from file (used for scat_grid) +static int ScanAngleSet(FILE *file,const char *fname, /* opened file and filename */ + angle_set *a, /* pointers to angle set */ + char *buf,char *temp, /* 2 buffers */ + const int buf_size) /* and their size */ + /* scan range or set of angles (theta or phi) from file (used for scat_grid) */ { - size_t i; - double unit; - - ScanString(file,fname,buf,buf_size,"type=",temp); - ScanSizet(file,fname,buf,buf_size,"N=",&(a->N)); - // initialize angle array - MALLOC_VECTOR(a->val,double,a->N,ALL); - memory += a->N*sizeof(double); - - if (strcmp(temp,"range")==0) { - ScanDouble(file,fname,buf,buf_size,"min=",&(a->min)); - ScanDouble(file,fname,buf,buf_size,"max=",&(a->max)); - if (a->min>a->max) LogError(EC_ERROR,ONE_POS, - "Wrong range (min=%g, max=%g) in file %s (max must be >= min)",a->min,a->max,fname); - if (a->N==1) a->val[0]=(a->max + a->min)/2; - else { - unit = (a->max - a->min)/(a->N - 1); - for (i=0;iN;i++) a->val[i] = a->min + unit*i; - } - return SG_RANGE; - } - else if (strcmp(temp,"values")==0) { - ReadLineStart(file,fname,buf,buf_size,"values="); - for (i=0;iN;i++) { - fgets(buf,buf_size,file); - if (strstr(buf,"\n")==NULL && !feof(file)) LogError(EC_ERROR,ONE_POS, - "Buffer overflow while scanning lines in file '%s' (line size > %d)", - fname,buf_size-1); - if (sscanf(buf,"%lf\n",a->val+i)!=1) LogError(EC_ERROR,ONE_POS, - "Failed scanning values from line '%s' in file '%s'",buf,fname); - } - return SG_VALUES; - } - else LogError(EC_ERROR,ONE_POS,"Unknown type '%s' in file '%s'",temp,fname); - // not actually reached - return -1; + size_t i; + double unit; + int value; + + ScanString(file,fname,buf,buf_size,"type=",temp); + ScanInt(file,fname,buf,buf_size,"N=",&value); + if (value<=0) LogError(EC_ERROR,ONE_POS, + "Number of angles in file '%s' (after 'N=') must be positive",fname); + else a->N=value; + /* initialize angle array */ + MALLOC_VECTOR(a->val,double,a->N,ALL); + memory += a->N*sizeof(double); + + if (strcmp(temp,"range")==0) { + ScanDouble(file,fname,buf,buf_size,"min=",&(a->min)); + ScanDouble(file,fname,buf,buf_size,"max=",&(a->max)); + if (a->min>a->max) LogError(EC_ERROR,ONE_POS, + "Wrong range (min=%g, max=%g) in file %s (max must be >= min)",a->min,a->max,fname); + if (a->N==1) a->val[0]=(a->max + a->min)/2; + else { + unit = (a->max - a->min)/(a->N - 1); + for (i=0;iN;i++) a->val[i] = a->min + unit*i; + } + return SG_RANGE; + } + else if (strcmp(temp,"values")==0) { + ReadLineStart(file,fname,buf,buf_size,"values="); + for (i=0;iN;i++) { + fgets(buf,buf_size,file); + if (strstr(buf,"\n")==NULL && !feof(file)) LogError(EC_ERROR,ONE_POS, + "Buffer overflow while scanning lines in file '%s' (line size > %d)",fname,buf_size-1); + if (sscanf(buf,"%lf\n",a->val+i)!=1) LogError(EC_ERROR,ONE_POS, + "Failed scanning values from line '%s' in file '%s'",buf,fname); + } + return SG_VALUES; + } + else LogError(EC_ERROR,ONE_POS,"Unknown type '%s' in file '%s'",temp,fname); + /* not actually reached */ + return -1; } -//===================================================================== +/*=====================================================================*/ void ReadAvgParms(const char *fname) -// read parameters of orientation averaging from a file + /* read parameters of orientation averaging from a file */ { - FILE *input; - char buf[BUF_LINE],temp[BUF_LINE]; - - // open file - input=FOpenErr(fname,"r",ALL_POS); - //scan file - ReadLineStart(input,fname,buf,BUF_LINE,"alpha:"); - ScanIntegrParms(input,fname,&alpha_int,&parms_alpha,FALSE,buf,temp,BUF_LINE); - full_al_range=fabs(alpha_int.max-alpha_int.min-FULL_ANGLE) %d)", - fname,BUF_LINE-1); - if (sscanf(buf,"%lf %lf\n",angles.theta.val+i,angles.phi.val+i)!=2) LogError(EC_ERROR, - ONE_POS,"Failed scanning values from line '%s' in file '%s'",buf,fname); - } - } - else LogError(EC_ERROR,ONE_POS,"Unknown global_type '%s' in file '%s'",temp,fname); - // close file - FCloseErr(input,fname,ALL_POS); - /* print info; conversions to (unsigned long) are needed (to remove warnings) because %z printf - * argument is not yet supported by all target compiler environmets - */ - if (ringid==ROOT) { - fprintf(logfile,"\nScattered field is calculated for multiple directions\n"); - if (angles.type==SG_GRID) { - if (theta_type==SG_RANGE) - fprintf(logfile,"theta: from %g to %g in %lu steps\n",angles.theta.min, - angles.theta.max,(unsigned long)angles.theta.N); - else if (theta_type==SG_VALUES) - fprintf(logfile,"theta: %lu given values\n",(unsigned long)angles.theta.N); - if (phi_type==SG_RANGE) { - fprintf(logfile,"phi: from %g to %g in %lu steps\n",angles.phi.min,angles.phi.max, - (unsigned long)angles.phi.N); - if (phi_integr) fprintf(logfile,"(Mueller matrix is integrated over phi)\n"); - } - else if (phi_type==SG_VALUES) - fprintf(logfile,"phi: %lu given values\n",(unsigned long)angles.phi.N); - } - else if (angles.type==SG_PAIRS) - fprintf(logfile,"Total %lu given (theta,phi) pairs\n",(unsigned long)angles.N); - fprintf(logfile,"\n"); - } - D("ReadScatGridParms finished"); + FILE *input; + char buf[BUF_LINE],temp[BUF_LINE]; + int theta_type,phi_type,value; + size_t i; + + /* open file */ + input=FOpenErr(fname,"r",ALL_POS); + /* scan file */ + ScanString(input,fname,buf,BUF_LINE,"global_type=",temp); + if (strcmp(temp,"grid")==0) { + angles.type = SG_GRID; + ReadLineStart(input,fname,buf,BUF_LINE,"theta:"); + theta_type=ScanAngleSet(input,fname,&(angles.theta),buf,temp,BUF_LINE); + if (phi_integr) { + ReadLineStart(input,fname,buf,BUF_LINE,"phi_integr:"); + ScanIntegrParms(input,fname,&(angles.phi),&phi_sg,FALSE,buf,temp,BUF_LINE); + phi_type = SG_RANGE; + } + else { + ReadLineStart(input,fname,buf,BUF_LINE,"phi:"); + phi_type=ScanAngleSet(input,fname,&(angles.phi),buf,temp,BUF_LINE); + } + angles.N=MultOverflow(angles.theta.N,angles.phi.N,ONE_POS,"angles.N");; + } + else if (strcmp(temp,"pairs")==0) { + if (phi_integr) + LogError(EC_ERROR,ONE_POS,"Integration over phi can't be done with 'global_type=pairs'"); + angles.type = SG_PAIRS; + ScanInt(input,fname,buf,BUF_LINE,"N=",&value); + if (value<=0) LogError(EC_ERROR,ONE_POS, + "Number of angle pairs in file '%s' (after 'N=') must be positive",fname); + else angles.N=value; + angles.theta.N=angles.phi.N=angles.N; + /* malloc angle arrays */ + MALLOC_VECTOR(angles.theta.val,double,angles.N,ALL); + MALLOC_VECTOR(angles.phi.val,double,angles.N,ALL); + memory += 2*angles.N*sizeof(double); + + ReadLineStart(input,fname,buf,BUF_LINE,"pairs="); + for (i=0;i %d)",fname,BUF_LINE-1); + if (sscanf(buf,"%lf %lf\n",angles.theta.val+i,angles.phi.val+i)!=2) + LogError(EC_ERROR,ONE_POS,"Failed scanning values from line '%s' in file '%s'",buf,fname); + } + } + else LogError(EC_ERROR,ONE_POS,"Unknown global_type '%s' in file '%s'",temp,fname); + /* close file */ + FCloseErr(input,fname,ALL_POS); + /* print info */ + if (ringid==ROOT) { + fprintf(logfile,"\nScattered field is calculated for multiple directions\n"); + if (angles.type==SG_GRID) { + if (theta_type==SG_RANGE) + fprintf(logfile,"theta: from %g to %g in %u steps\n", + angles.theta.min,angles.theta.max,angles.theta.N); + else if (theta_type==SG_VALUES) + fprintf(logfile,"theta: %u given values\n",angles.theta.N); + if (phi_type==SG_RANGE) { + fprintf(logfile,"phi: from %g to %g in %u steps\n", + angles.phi.min,angles.phi.max,angles.phi.N); + if (phi_integr) fprintf(logfile,"(Mueller matrix is integrated over phi)\n"); + } + else if (phi_type==SG_VALUES) + fprintf(logfile,"phi: %u given values\n",angles.phi.N); + } + else if (angles.type==SG_PAIRS) + fprintf(logfile,"Total %u given (theta,phi) pairs\n",angles.N); + fprintf(logfile,"\n"); + } + D("ReadScatGridParms finished"); } -//=====================================================================*/ +/*=====================================================================*/ -void CalcField (doublecomplex *ebuff, // where to write calculated scattering amplitude - const double *n) // scattering direction -/* Near-optimal routine to compute the scattered fields at one specific angle (more exactly - - * scattering amplitude); Specific optimization are possible when e.g. n[0]=0 for scattering in - * yz-plane, however in this case it is very improbable that the routine will become a bottleneck. - * The latter happens mostly for cases, when grid of scattering angles is used with only small - * fraction of n, allowing simplifications. - */ +void CalcField (doublecomplex *ebuff, /* where to write calculated scattering amplitude */ + const double *n) /* scattering direction */ + /* Near-optimal routine to compute the scattered fields at one specific + angle (more exactly - scattering amplitude); + Specific optimization are possible when e.g. n[0]=0 for scattering in yz-plane, however in + this case it is very unprobable that the routine will become a bottleneck. The latter happens + mostly for cases, when grid of scattering angles is used with only small fraction of n, + allowing simplifications. */ { - double kkk; - doublecomplex a,m2,dpr; - doublecomplex sum[3],tbuff[3],tmp; - int i; - unsigned short ix,iy1,iy2,iz1,iz2; - size_t j,jjj; - double temp, na; - doublecomplex mult_mat[MAX_NMAT]; - const int scat_avg=TRUE; - - if (ScatRelation==SQ_SO) { - // !!! this should never happen - if (anisotropy) LogError(EC_ERROR,ONE_POS,"Incompatibility error in CalcField"); - // calculate correction coefficient - if (scat_avg) na=0; - else na=DotProd(n,prop); - temp=kd*kd/24; - for(i=0;i materialT; b) DipoleCoord -> rdipT; - * c) pvec -> pT - */ - // initialize local_nvoid_d0 and local_nvoid_d1 - MALLOC_VECTOR(nvoid_array,double,nprocs,ALL); - nvoid_array[ringid]=local_nvoid_Ndip; - AllGather(nvoid_array+ringid,nvoid_array,double_type,nprocs); - local_nvoid_d0=0; - for (i=0;i materialT + b) DipoleCoord -> rdipT + c) pvec -> pT + */ + /* initialize local_nvoid_d0 and local_nvoid_d1 */ + MALLOC_VECTOR(nvoid_array,double,nprocs,ALL); + nvoid_array[ringid]=local_nvoid_Ndip; + AllGather(nvoid_array+ringid,nvoid_array,double_type,nprocs); + local_nvoid_d0=0; + for (i=0;i @@ -24,1140 +24,1137 @@ #include "function.h" #ifdef FFTW3 -# include -/* define level of planning for usual and Dmatrix (DM) FFT: FFTW_ESTIMATE (heuristics), - * FFTW_MEASURE (default), FTW_PATIENT, or FFTW_EXHAUSTIVE - */ -# define PLAN_FFTW FFTW_MEASURE -# define PLAN_FFTW_DM FFTW_ESTIMATE +# include +/* define level of planning for usual and Dmatrix (DM) FFT */ +/* FFTW_ESTIMATE (heuristics), FFTW_MEASURE (def), FTW_PATIENT, or FFTW_EXHAUSTIVE */ +# define PLAN_FFTW FFTW_MEASURE +# define PLAN_FFTW_DM FFTW_ESTIMATE #endif -// for transpose YZ +/* for transpose YZ */ #define TR_BLOCK 64 -// SEMI-GLOBAL VARIABLES +/* SEMI-GLOBAL VARIABLES */ -// defined ant initialized in calculator.c +/* defined ant initialized in calculator.c */ extern const double *tab1,*tab2,*tab3,*tab4,*tab5,*tab6,*tab7,*tab8,*tab9,*tab10; extern const int **tab_index; -// defined and initialized in timing.c +/* defined and initialized in timing.c */ extern TIME_TYPE Timing_FFT_Init,Timing_Dm_Init; -// used in matvec.c -doublecomplex *Dmatrix; // holds FFT of the interaction matrix -doublecomplex *Xmatrix; // holds input vector (on expanded grid) to matvec -doublecomplex *slices; // used in inner cycle of matvec - holds 3 components (for fixed x) -doublecomplex *slices_tr; // additional storage space for slices to accelerate transpose -size_t DsizeY,DsizeZ,DsizeYZ; // size of the 'matrix' D +/* used in matvec.c */ +doublecomplex *Dmatrix; /* holds FFT of the interaction matrix */ +doublecomplex *Xmatrix; /* holds input vector (on expanded grid) to matvec */ +doublecomplex *slices; /* used in inner cycle of matvec - holds 3 components (for fixed x) */ +doublecomplex *slices_tr; /* additional storage space for slices to accelerate transpose */ +size_t DsizeY,DsizeZ,DsizeYZ; /* size of the 'matrix' D */ -// used in comm.c -double *BT_buffer, *BT_rbuffer; // buffers for BlockTranspose +/* used in comm.c */ +double *BT_buffer, *BT_rbuffer; /* buffers for BlockTranspose */ -// LOCAL VARIABLES +/* LOCAL VARIABLES */ -// D2 matrix and its two slices; used only temporary for InitDmatrix +/* D2 matrix and its two slices; used only temporary for InitDmatrix */ static doublecomplex *slice,*slice_tr,*D2matrix; -static size_t D2sizeX,D2sizeY,D2sizeZ; // size of the 'matrix' D2 -static size_t blockTr=TR_BLOCK; // block size for TransposeYZ; see fft.h -static int weird_nprocs; // whether weird number of processors is used +static size_t D2sizeX,D2sizeY,D2sizeZ; /* size of the 'matrix' D2 */ +static size_t blockTr=TR_BLOCK; /* block size for TransposeYZ; see fft.h */ +static int weird_nprocs; /* whether weird number of processors is used */ #ifdef FFTW3 -// FFTW3 plans: f - FFT_FORWARD; b - FFT_BACKWARD -static fftw_plan planXf,planXb,planYf,planYb,planZf,planZb,planXf_Dm,planYf_Dm,planZf_Dm; + /* FFTW3 plans: f - FFT_FORWARD; b - FFT_BACKWARD */ + static fftw_plan planXf,planXb,planYf,planYb,planZf,planZb,planXf_Dm,planYf_Dm,planZf_Dm; #elif defined(FFT_TEMPERTON) # define IFAX_SIZE 20 -static double *trigsX,*trigsY,*trigsZ,*work; // arrays for Temperton FFT -static int ifaxX[IFAX_SIZE],ifaxY[IFAX_SIZE],ifaxZ[IFAX_SIZE]; -// Fortran routines from cfft99D.f -void cftfax_(const int *nn,int *ifax,double *trigs); -void cfft99_(double *data,double *_work,const double *trigs,const int *ifax,const int *inc, - const int *jump,const int *nn,const int *lot,const int *isign); + static double *trigsX,*trigsY,*trigsZ,*work; /* arrays for Temperton FFT */ + static int ifaxX[IFAX_SIZE],ifaxY[IFAX_SIZE],ifaxZ[IFAX_SIZE]; + /* Fortran routines from cfft99D.f */ + void cftfax_(const int *nn,int *ifax,double *trigs); + void cfft99_(double *data,double *_work,const double *trigs,const int *ifax,const int *inc, + const int *jump,const int *nn,const int *lot,const int *isign); #endif -// EXTERNAL FUNCTIONS +/* EXTERNAL FUNCTIONS */ -// sinint.c +/* sinint.c */ void cisi(double x,double *ci,double *si); -//============================================================ +/*============================================================*/ INLINE size_t IndexDmatrix(const size_t x,size_t y,size_t z) -// index D matrix to store final result + /* index D matrix to store final result */ { - if (y>=DsizeY) y=gridY-y; - if (z>=DsizeZ) z=gridZ-z; + if (y>=DsizeY) y=gridY-y; + if (z>=DsizeZ) z=gridZ-z; - return(NDCOMP*(x*DsizeYZ+z*DsizeY+y)); + return(NDCOMP*(x*DsizeYZ+z*DsizeY+y)); } -//============================================================ +/*============================================================*/ INLINE size_t IndexGarbledD(const size_t x,int y,int z,const size_t lengthN) -// index D2 matrix after BlockTranspose + /* index D2 matrix after BlockTranspose */ { - if (y<0) y+=D2sizeY; - if (z<0) z+=D2sizeZ; + if (y<0) y+=D2sizeY; + if (z<0) z+=D2sizeZ; #ifdef PARALLEL - return(((z%lengthN)*D2sizeY+y)*gridX+(z/lengthN)*local_Nx+x%local_Nx); + return(((z%lengthN)*D2sizeY+y)*gridX+(z/lengthN)*local_Nx+x%local_Nx); #else - return((z*D2sizeY+y)*gridX+x); + return((z*D2sizeY+y)*gridX+x); #endif } -//============================================================ +/*============================================================*/ INLINE size_t IndexD2matrix(int x,int y,int z,const int nnn) -// index D2 matrix to store calculated elements + /* index D2 matrix to store calculate elements */ { - if (x<0) x+=gridX; - if (y<0) y+=D2sizeY; - // if (z<0) z+=D2sizeZ; - return(((z-nnn*local_z0)*D2sizeY+y)*gridX+x); + if (x<0) x+=gridX; + if (y<0) y+=D2sizeY; +/* if (z<0) z+=D2sizeZ; */ + return(((z-nnn*local_z0)*D2sizeY+y)*gridX+x); } -//============================================================ +/*============================================================*/ INLINE size_t IndexSliceD2matrix(int y,int z) -// index slice of D2 matrix + /* index slice of D2 matrix */ { - if (y<0) y+=gridY; - if (z<0) z+=gridZ; + if (y<0) y+=gridY; + if (z<0) z+=gridZ; - return(y*gridZ+z); + return(y*gridZ+z); } -//============================================================ +/*============================================================*/ INLINE size_t IndexSlice_zyD2matrix(const size_t y,const size_t z) -// index transposed slice of D2 matrix + /* index transposed slice of D2 matrix */ { - return (z*gridY+y); + return (z*gridY+y); } -//============================================================ +/*============================================================*/ void TransposeYZ(const int direction) -// optimized routine to transpose y and z; forward: slices->slices_tr; backward: slices_tr->slices + /* optimised routine to transpose y and z + forward: slices -> slices_tr + backward: slices_tr -> slices */ { - size_t y,z,Y,Z,y1,y2,z1,z2,i,j,y0,z0,Xcomp; - doublecomplex *t0,*t1,*t2,*t3,*t4,*w0,*w1,*w2,*w3; - - if (direction==FFT_FORWARD) { - Y=gridY; - Z=gridZ; - w0=slices; - t0=slices_tr-Y; - } - else { // direction==FFT_BACKWARD - Y=gridZ; - Z=gridY; - w0=slices_tr; - t0=slices-Y; - } - - y1=Y/blockTr; - y2=Y%blockTr; - z1=Z/blockTr; - z2=Z%blockTr; - - for(Xcomp=0;Xcomp<3;Xcomp++) { - w1=w0+Xcomp*gridYZ; - t1=t0+Xcomp*gridYZ; - for(i=0;i<=y1;i++) { - if (i==y1) y0=y2; - else y0=blockTr; - w2=w1; - t2=t1; - for(j=0;j<=z1;j++) { - if (j==z1) z0=z2; - else z0=blockTr; - w3=w2; - t3=t2; - for (y=0;y trans + /* optimised routine to transpose y and z for Dmatrix: data -> trans */ { - size_t y,z,Y,Z,y1,y2,z1,z2,i,j,y0,z0; - doublecomplex *t1,*t2,*t3,*t4,*w1,*w2,*w3; - - Y=gridY; - Z=gridZ; - - y1=Y/blockTr; - y2=Y%blockTr; - z1=Z/blockTr; - z2=Z%blockTr; - - w1=data; - t1=trans-Y; - - for(i=0;i<=y1;i++) { - if (i==y1) y0=y2; - else y0=blockTr; - w2=w1; - t2=t1; - for(j=0;j<=z1;j++) { - if (j==z1) z0=z2; - else z0=blockTr; - w3=w2; - t3=t2; - for (y=0;y=x divisible by 2,3,5 only (if FFTW3 7 and one of 11 or 13 are allowed), - * and also divisible by 2 and divis. If weird_nprocs is used, only the latter condition is required - */ + /* find the first number >=x divisible by 2,3,5 only, + (if FFTW3 7 and one of 11 or 13 are allowed) + and divisible by 2 and divis + if weird_nprocs is used, only the latter condition is required */ { - int y; - - if (weird_nprocs) { - if (!IS_EVEN(divis)) divis*=2; - return (divis*((x+divis-1)/divis)); - } - else while (TRUE) { - y=x; - while (y%2==0) y/=2; - while (y%3==0) y/=3; - while (y%5==0) y/=5; + int y; + + if (weird_nprocs) { + if (divis%2!=0) divis*=2; + return (divis*((x+divis-1)/divis)); + } + else while (TRUE) { + y=x; + while (y%2==0) y/=2; + while (y%3==0) y/=3; + while (y%5==0) y/=5; #ifdef FFTW3 - while (y%7==0) y/=7; - // one multiplier of either 11 or 13 is allowed - if (y%11==0) y/=11; - else if (y%13==0) y/=13; + while (y%7==0) y/=7; + /* one multiplier of either 11 or 13 is allowed */ + if (y%11==0) y/=11; + else if (y%13==0) y/=13; #endif - if (y==1 && IS_EVEN(x) && x%divis==0) return(x); - x++; - } + if (y==1 && x%2==0 && x%divis==0) return(x); + x++; + } } -//============================================================= +/*=============================================================*/ static void fftInitBeforeD(const int lengthZ) -// initialize fft before initialization of Dmatrix + /* initialize fft before initialization of Dmatrix */ { #ifdef FFTW3 - int grXint=gridX,grYint=gridY,grZint=gridZ; // this is needed to provide 'int *' to grids - - planYf_Dm=fftw_plan_many_dft(1,&grYint,gridZ,slice_tr,NULL,1,gridY, - slice_tr,NULL,1,gridY,FFT_FORWARD,PLAN_FFTW_DM); - planZf_Dm=fftw_plan_many_dft(1,&grZint,gridY,slice,NULL,1,gridZ, - slice,NULL,1,gridZ,FFT_FORWARD,PLAN_FFTW_DM); - planXf_Dm=fftw_plan_many_dft(1,&grXint,lengthZ*D2sizeY,D2matrix,NULL,1,gridX, - D2matrix,NULL,1,gridX,FFT_FORWARD,PLAN_FFTW_DM); + planYf_Dm=fftw_plan_many_dft(1,(int *)&gridY,gridZ,slice_tr,NULL,1,gridY, + slice_tr,NULL,1,gridY,FFT_FORWARD,PLAN_FFTW_DM); + planZf_Dm=fftw_plan_many_dft(1,(int *)&gridZ,gridY,slice,NULL,1,gridZ, + slice,NULL,1,gridZ,FFT_FORWARD,PLAN_FFTW_DM); + planXf_Dm=fftw_plan_many_dft(1,(int *)&gridX,lengthZ*D2sizeY,D2matrix,NULL,1,gridX, + D2matrix,NULL,1,gridX,FFT_FORWARD,PLAN_FFTW_DM); #elif defined(FFT_TEMPERTON) - int size,nn; - - // allocate memory - MALLOC_VECTOR(trigsX,double,2*gridX,ALL); - MALLOC_VECTOR(trigsY,double,2*gridY,ALL); - MALLOC_VECTOR(trigsZ,double,2*gridZ,ALL); - size=MAX(gridX*D2sizeY,3*gridYZ); - MALLOC_VECTOR(work,double,2*size,ALL); - // initialize ifax and trigs - nn=gridX; - cftfax_ (&nn,ifaxX,trigsX); - nn=gridY; - cftfax_ (&nn,ifaxY,trigsY); - nn=gridZ; - cftfax_ (&nn,ifaxZ,trigsZ); + int size,nn; + + /* allocate memory */ + MALLOC_VECTOR(trigsX,double,2*gridX,ALL); + MALLOC_VECTOR(trigsY,double,2*gridY,ALL); + MALLOC_VECTOR(trigsZ,double,2*gridZ,ALL); + size=MAX(gridX*D2sizeY,3*gridYZ); + MALLOC_VECTOR(work,double,2*size,ALL); + /* initialize ifax and trigs */ + nn=gridX; + cftfax_ (&nn,ifaxX,trigsX); + nn=gridY; + cftfax_ (&nn,ifaxY,trigsY); + nn=gridZ; + cftfax_ (&nn,ifaxZ,trigsZ); #endif } -//============================================================ +/*============================================================*/ static void fftInitAfterD(void) -// second part of fft initialization + /* second part of fft initialization */ { #ifdef FFTW3 - int lot; - fftw_iodim dims,howmany_dims[2]; - int grYint=gridY; // this is needed to provide 'int *' to gridY -# ifdef PRECISE_TIMING - SYSTEM_TIME tvp[13]; -# endif - PRINTZ("Initializing FFTW3\n"); - FFLUSHZ(stdout); -# ifdef PRECISE_TIMING - GetTime(tvp); -# endif - lot=3*gridZ; - planYf=fftw_plan_many_dft(1,&grYint,lot,slices_tr,NULL,1,gridY, - slices_tr,NULL,1,gridY,FFT_FORWARD,PLAN_FFTW); -# ifdef PRECISE_TIMING - GetTime(tvp+1); -# endif - planYb=fftw_plan_many_dft(1,&grYint,lot,slices_tr,NULL,1,gridY, - slices_tr,NULL,1,gridY,FFT_BACKWARD,PLAN_FFTW); -# ifdef PRECISE_TIMING - GetTime(tvp+2); -# endif - dims.n=gridZ; - dims.is=dims.os=1; - howmany_dims[0].n=3; - howmany_dims[0].is=howmany_dims[0].os=gridZ*gridY; - howmany_dims[1].n=boxY; - howmany_dims[1].is=howmany_dims[1].os=gridZ; - planZf=fftw_plan_guru_dft(1,&dims,2,howmany_dims,slices,slices,FFT_FORWARD,PLAN_FFTW); -# ifdef PRECISE_TIMING - GetTime(tvp+3); -# endif - planZb=fftw_plan_guru_dft(1,&dims,2,howmany_dims,slices,slices,FFT_BACKWARD,PLAN_FFTW); -# ifdef PRECISE_TIMING - GetTime(tvp+4); -# endif - dims.n=gridX; - dims.is=dims.os=1; - howmany_dims[0].n=3*local_Nz; - howmany_dims[0].is=howmany_dims[0].os=smallY*gridX; - howmany_dims[1].n=boxY; - howmany_dims[1].is=howmany_dims[1].os=gridX; - planXf=fftw_plan_guru_dft(1,&dims,2,howmany_dims,Xmatrix,Xmatrix,FFT_FORWARD,PLAN_FFTW); -# ifdef PRECISE_TIMING - GetTime(tvp+5); -# endif - planXb=fftw_plan_guru_dft(1,&dims,2,howmany_dims,Xmatrix,Xmatrix,FFT_BACKWARD,PLAN_FFTW); -# ifdef PRECISE_TIMING - GetTime(tvp+6); - // print precise timing of FFT planning - SetTimerFreq(); - PRINTBOTHZ(logfile, - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" - " FFTW3 planning \n" - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" - "Yf = %4.4f Total = %4.4f\n" - "Yb = %4.4f\n" - "Zf = %4.4f\n" - "Zb = %4.4f\n" - "Xf = %4.4f\n" - "Xb = %4.4f\n\n", - DiffSec(tvp,tvp+1),DiffSec(tvp,tvp+6),DiffSec(tvp+1,tvp+2),DiffSec(tvp+2,tvp+3), - DiffSec(tvp+3,tvp+4),DiffSec(tvp+4,tvp+5),DiffSec(tvp+5,tvp+6)); -# endif - // destroy old plans - fftw_destroy_plan(planXf_Dm); - fftw_destroy_plan(planYf_Dm); - fftw_destroy_plan(planZf_Dm); + int lot; + fftw_iodim dims,howmany_dims[2]; +# ifdef PRECISE_TIMING + SYSTEM_TIME tvp[13]; +# endif + PRINTZ("Initializing FFTW3\n"); + FFLUSHZ(stdout); +# ifdef PRECISE_TIMING + GetTime(tvp); +# endif + lot=3*gridZ; + planYf=fftw_plan_many_dft(1,(int *)&gridY,lot,slices_tr,NULL,1,gridY, + slices_tr,NULL,1,gridY,FFT_FORWARD,PLAN_FFTW); +# ifdef PRECISE_TIMING + GetTime(tvp+1); +# endif + planYb=fftw_plan_many_dft(1,(int *)&gridY,lot,slices_tr,NULL,1,gridY, + slices_tr,NULL,1,gridY,FFT_BACKWARD,PLAN_FFTW); +# ifdef PRECISE_TIMING + GetTime(tvp+2); +# endif + dims.n=gridZ; + dims.is=dims.os=1; + howmany_dims[0].n=3; + howmany_dims[0].is=howmany_dims[0].os=gridZ*gridY; + howmany_dims[1].n=boxY; + howmany_dims[1].is=howmany_dims[1].os=gridZ; + planZf=fftw_plan_guru_dft(1,&dims,2,howmany_dims,slices,slices,FFT_FORWARD,PLAN_FFTW); +# ifdef PRECISE_TIMING + GetTime(tvp+3); +# endif + planZb=fftw_plan_guru_dft(1,&dims,2,howmany_dims,slices,slices,FFT_BACKWARD,PLAN_FFTW); +# ifdef PRECISE_TIMING + GetTime(tvp+4); +# endif + dims.n=gridX; + dims.is=dims.os=1; + howmany_dims[0].n=3*local_Nz; + howmany_dims[0].is=howmany_dims[0].os=smallY*gridX; + howmany_dims[1].n=boxY; + howmany_dims[1].is=howmany_dims[1].os=gridX; + planXf=fftw_plan_guru_dft(1,&dims,2,howmany_dims,Xmatrix,Xmatrix,FFT_FORWARD,PLAN_FFTW); +# ifdef PRECISE_TIMING + GetTime(tvp+5); +# endif + planXb=fftw_plan_guru_dft(1,&dims,2,howmany_dims,Xmatrix,Xmatrix,FFT_BACKWARD,PLAN_FFTW); +# ifdef PRECISE_TIMING + GetTime(tvp+6); + /* print precise timing of FFT planning */ + SetTimerFreq(); + PRINTBOTHZ(logfile, + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"\ + " FFTW3 planning \n"\ + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"\ + "Yf = %4.4f Total = %4.4f\n"\ + "Yb = %4.4f\n"\ + "Zf = %4.4f\n"\ + "Zb = %4.4f\n"\ + "Xf = %4.4f\n"\ + "Xb = %4.4f\n\n", + DiffSec(tvp,tvp+1),DiffSec(tvp,tvp+6),DiffSec(tvp+1,tvp+2),DiffSec(tvp+2,tvp+3), + DiffSec(tvp+3,tvp+4),DiffSec(tvp+4,tvp+5),DiffSec(tvp+5,tvp+6)); +# endif + /* destroy old plans */ + fftw_destroy_plan(planXf_Dm); + fftw_destroy_plan(planYf_Dm); + fftw_destroy_plan(planZf_Dm); #endif } -//============================================================ +/*============================================================*/ static void CalcInterTerm(int i,int j,int k,int mu,int nu,doublecomplex result) -/* calculates interaction term between two dipoles; given integer distance vector {i,j,k} - * (in units of d), and component indices mu,nu. - */ + /* calculates interaction term between two dipoles; given integer distance vector {i,j,k} + (in units of d), and component indices mu,nu */ { - double rr,rtemp[3],qvec[3],q2[3],invr,invr3,qavec[3],av[3]; - double rr2,kr,kr2,kr3,kd2,q4,rn; - double temp,qmunu,qa,qamunu,invrn,invrn2,invrn3,invrn4,dmunu; - double kfr,ci,si,ci1,si1,ci2,si2,brd,cov,siv,g0,g2; - doublecomplex expval,br,br1,m,m2,Gf1,Gm0,Gm1,Gc1,Gc2; - int ind0,ind1,ind2,ind2m,ind3,ind4,indmunu; - int sigV[3],ic,sig,ivec[3],ord[3],invord[3]; - double t3q,t3a,t4q,t4a,t5tr,t5aa,t6tr,t6aa; - //int pr; - const int inter_avg=TRUE; - - // self interaction; self term is computed in different subroutine - if (i==0 && j==0 && k==0) { - result[RE]=result[IM]=0.0; - return; - } - - // for debugging - //pr=(i==1 && j==1 && k==1); - //if (pr) PRINTZ("%d,%d: ",mu,nu); - - // initialize rtemp - rtemp[0]=i*gridspace; - rtemp[1]=j*gridspace; - rtemp[2]=k*gridspace; - //====== calculate some basic constants ====== - rr2 = DotProd(rtemp,rtemp); - rr = sqrt(rr2); - rn=rr/gridspace; // normalized r - invr = 1/rr; - invr3 = invr*invr*invr; - MultScal(invr,rtemp,qvec); - kr = WaveNum * rr; - kr2 = kr*kr; - kfr=PI*rn; // k_F*r, for FCD - qmunu=qvec[mu]*qvec[nu]; - // cov=cos(kr); siv=sin(kr); expval=Exp(ikr)/r^3 - imExp(kr,expval); - cov=expval[RE]; - siv=expval[IM]; - cMultReal(invr3,expval,expval); - //====== calculate Gp ======== - // br=delta[mu,nu]*(-1+ikr+kr^2)-qmunu*(-3+3ikr+kr^2) - br[RE]=(3-kr2)*qmunu; - br[IM]=-3*kr*qmunu; - if(mu==nu) { - br[RE]+=kr2-1; - br[IM]+=kr; - } - // result=Gp=expval*br - cMult(br,expval,result); - //====== FCD (static and full) ======== - /* !!! speed of FCD can be improved by using faster version of sici routine, using predefined - * tables, etc (e.g. as is done in GSL library). But currently this do not seem to consume a - * significant portion of the total simulation time. - */ - if (IntRelation==G_FCD_ST) { - /* FCD is based on Gay-Balmaz P., Martin O.J.F. "A library for computing the filtered and - * non-filtered 3D Green's tensor associated with infinite homogeneous space and surfaces", - * Comp. Phys. Comm. 144:111-120 (2002), and Piller N.B. "Increasing the performance of the - * coupled-dipole approximation: A spectral approach", IEEE Trans.Ant.Propag. 46(8): - * 1126-1137. Here it differs by a factor of 4*pi*k^2. - */ - // result = Gp*[3*Si(k_F*r)+k_F*r*cos(k_F*r)-4*sin(k_F*r)]*2/(3*pi) - cisi(kfr,&ci,&si); - brd=TWO_OVER_PI*ONE_THIRD*(3*si+kfr*cos(kfr)-4*sin(kfr)); - cMultReal(brd,result,result); - } - else if (IntRelation==G_FCD) { - // ci,si_1,2 = ci,si_+,- = Ci,Si((k_F +,- k)r) - cisi(kfr+kr,&ci1,&si1); - cisi(kfr-kr,&ci2,&si2); - // ci=ci1-ci2; si=pi-si1-si2 - ci=ci1-ci2; - si=PI-si1-si2; - g0=INV_PI*(siv*ci+cov*si); - g2=INV_PI*(kr*(cov*ci-siv*si)+2*ONE_THIRD*(kfr*cos(kfr)-4*sin(kfr)))-g0; - temp=g0*kr2; - // brd=(delta[mu,nu]*(-g0*kr^2-g2)+qmunu*(g0*kr^2+3g2))/r^3 - brd=qmunu*(temp+3*g2); - if (mu==nu) brd-=temp+g2; - brd*=invr3; - // result=Gp+brd - result[RE]+=brd; - } - //======= second order corrections ======== - else if (IntRelation==G_SO) { - // !!! this should never happen - if (anisotropy) LogError(EC_ERROR,ONE_POS,"Incompatibility error in CalcInterTerm"); - kd2=kd*kd; - kr3=kr2*kr; - // only one refractive index can be used for FFT-compatible algorithm !!! - cEqual(ref_index[0],m); - cSquare(m,m2); - if (!inter_avg) { - qa=DotProd(qvec,prop); - // qamunu=qvec[mu]*prop[nu] + qvec[nu]*prop[mu] - qamunu=qvec[mu]*prop[nu]; - if (mu==nu) qamunu*=2; - else qamunu+=qvec[nu]*prop[mu]; - } - if (kr*rn < G_BOUND_CLOSE) { - //====== G close ============= - // check if inside the table bounds; needed to recompute to make an integer comparison - if ((i*i+j*j+k*k) > TAB_RMAX*TAB_RMAX) LogError(EC_ERROR,ALL_POS, - "Not enough table size (available only up to R/d=%d)",TAB_RMAX); - - // av is copy of propagation vector - if (!inter_avg) memcpy(av,prop,3*sizeof(double)); - ivec[0]=i; - ivec[1]=j; - ivec[2]=k; - // transformation of negative coordinates - for (ic=0;ic<3;ic++) { - if (ivec[ic]<0) { - sigV[ic]=-1; - av[ic]*=-1; - qvec[ic]*=-1; - ivec[ic]*=-1; - } - else sigV[ic]=1; - } - i=ivec[0]; - j=ivec[1]; - k=ivec[2]; - sig=sigV[mu]*sigV[nu]; // sign of some terms below - // transformation to case i>=j>=k>=0 - // building of ord; ord[x] is x-th largest coordinate (0-th - the largest) - if (i>=j) { - if (i>=k) { - ord[0]=0; - if (j>=k) { - ord[1]=1; - ord[2]=2; - } - else { - ord[1]=2; - ord[2]=1; - } - } - else { - ord[0]=2; - ord[1]=0; - ord[2]=1; - } - } - else { - if (i>=k) { - ord[0]=1; - ord[1]=0; - ord[2]=2; - } - else { - ord[2]=0; - if (j>=k) { - ord[0]=1; - ord[1]=2; - } - else { - ord[0]=2; - ord[1]=1; - } - } - } - // change parameters according to coordinate transforms - Permutate(qvec,ord); - if (!inter_avg) Permutate(av,ord); - Permutate_i(ivec,ord); - i=ivec[0]; - j=ivec[1]; - k=ivec[2]; - // compute inverse permutation - memcpy(invord,ord,3*sizeof(int)); - Permutate_i(invord,ord); - if (invord[0]==0 && invord[1]==1 && invord[2]==2) memcpy(invord,ord,3*sizeof(int)); - // compute transformed indices mu and nu - mu=invord[mu]; - nu=invord[nu]; - // indexes for tables of different dimensions - // indmunu is a number of component[mu,nu] in symmetric matrix - indmunu=mu+nu; - if (mu==2 || nu==2) indmunu++; - - ind0=tab_index[i][j]+k; - ind1=3*ind0; - ind2m=6*ind0; - ind2=ind2m+indmunu; - ind3=3*ind2; - ind4=6*ind2; - // computing several quantities with table integrals - t3q=DotProd(qvec,tab3+ind1); - t4q=DotProd(qvec,tab4+ind3); - t5tr=TrSym(tab5+ind2m); - t6tr=TrSym(tab6+ind4); - if (inter_avg) { - // =1/3*delta[mu,nu] - t5aa=ONE_THIRD*t5tr; - t6aa=ONE_THIRD*t6tr; - } - else { - t3a=DotProd(av,tab3+ind1); - t4a=DotProd(av,tab4+ind3); - t5aa=QuadForm(tab5+ind2m,av); - t6aa=QuadForm(tab6+ind4,av); - } - //====== computing Gc0 ===== - // temp = kr/24 - temp=kr/24; - /* br = delta[mu,nu]*(-I7-I9/2-kr*(i+kr)/24+2*t3q+t5tr) - * - (-3I8[mu,nu]-3I10[mu,nu]/2-qmunu*kr*(i+kr)/24+2*t4q+t6tr) - */ - br[RE]=sig*(3*(tab10[ind2]/2+tab8[ind2])-2*t4q-t6tr)+temp*qmunu*kr; - br[IM]=3*temp*qmunu; - if (mu==nu) { - br[RE]+=2*t3q+t5tr-temp*kr-tab9[ind0]/2-tab7[ind0]; - br[IM]-=temp; - } - // br*=kd^2 - cMultReal(kd2,br,br); - // br+=I1*delta[mu,nu]*(-1+ikr+kr^2)-sig*I2[mu,nu]*(-3+3ikr+kr^2) - br[RE]+=sig*tab2[ind2]*(3-kr2); - br[IM]-=sig*tab2[ind2]*3*kr; - if (mu==nu) { - br[RE]+=tab1[ind0]*(kr2-1); - br[IM]+=tab1[ind0]*kr; - } - // Gc0=expval*br - cMult(expval,br,result); - //==== computing Gc1 ====== - if (!inter_avg) { - // br=(kd*kr/24)*(qa*(delta[mu,nu]*(-2+ikr)-qmunu*(-6+ikr))-qamunu) - br[RE]=6*qmunu; - br[IM]=-kr*qmunu; - if (mu==nu) { - br[RE]-=2; - br[IM]+=kr; - } - cMultReal(qa,br,br); - br[RE]-=qamunu; - cMultReal(2*temp*kd,br,br); - // br1=(d/r)*(delta[mu,nu]*t3h*(-1+ikr)-sig*t4h*(-3+3ikr)) - br1[RE]=3*sig*t4a; - br1[IM]=-kr*br1[RE]; - if (mu==nu) { - br1[RE]-=t3a; - br1[IM]+=t3a*kr; - } - cMultReal(1/rn,br1,br1); - // Gc1=expval*i*m*kd*(br1+br) - cAdd(br,br1,Gc1); - cMultSelf(Gc1,m); - cMultReal(kd,Gc1,Gc1); - cMultSelf(Gc1,expval); - cMult_i(Gc1); - } - //==== computing Gc2 ====== - // br=delta[mu,nu]*t5aa-3*sig*t6aa-(kr/12)*(delta[mu,nu]*(i+kr)-qmunu*(3i+kr)) - br[RE]=-kr*qmunu; - br[IM]=-3*qmunu; - if (mu==nu) { - br[RE]+=kr; - br[IM]+=1; - } - cMultReal(-2*temp,br,br); - br[RE]-=3*sig*t6aa; - if (mu==nu) br[RE]+=t5aa; - // Gc2=expval*(kd^2/2)*m^2*br - cMult(m2,br,Gc2); - cMultReal(kd2/2,Gc2,Gc2); - cMultSelf(Gc2,expval); - // result = Gc0 + [ Gc1 ] + Gc2 - if (!inter_avg) cAdd(Gc2,Gc1,Gc2); - cAdd(Gc2,result,result); - } - else { - //====== Gfar (and part of Gmedian) ======= - // temp=kd^2/24 - temp=kd2/24; - // br=1-(1+m^2)*kd^2/24 - br[RE]=1-(1+m2[RE])*temp; - br[IM]=-m2[IM]*temp; - // Gf0 + Gf2 = Gp*br - cMultSelf(result,br); - //==== compute and add Gf1 === - if (!inter_avg) { - /* br = {delta[mu,nu]*(3-3ikr-2kr^2+ikr^3)-qmunu*(15-15ikr-6kr^2+ikr^3)}*qa - * + qamunu*(3-3ikr-kr^2) - */ - br[RE]=(6*kr2-15)*qmunu; - br[IM]=(15*kr-kr3)*qmunu; - if(mu==nu) { - br[RE]+=3-2*kr2; - br[IM]+=kr3-3*kr; - } - cMultReal(qa,br,br); - br[RE]+=(3-kr2)*qamunu; - br[IM]-=3*kr*qamunu; - // temp = kd^2/(12*kr) - temp*=2/kr; - // Gf1=expval*i*m*temp*br - cMult(m,br,Gf1); - cMultReal(temp,Gf1,Gf1); - cMultSelf(Gf1,expval); - cMult_i(Gf1); - // result = Gf - cAdd(Gf1,result,result); - } - if (kr < G_BOUND_MEDIAN) { - //===== G median ======== - vMult(qvec,qvec,q2); - q4=DotProd(q2,q2); - invrn=1/rn; - invrn2=invrn*invrn; - invrn3=invrn2*invrn; - invrn4=invrn2*invrn2; - // Gm0=expval*br*temp - temp=qmunu*(33*q4-7-12*(q2[mu]+q2[nu])); - if (mu == nu) temp+=(1-3*q4+4*q2[mu]); - temp*=7*invrn4/64; - br[RE]=-1; - br[IM]=kr; - cMultReal(temp,br,Gm0); - cMultSelf(Gm0,expval); - if (!inter_avg) { - // Gm1=expval*i*m*temp - vMult(qvec,prop,qavec); - if (mu == nu) dmunu=1; - else dmunu=0; - temp = 3*qa*(dmunu-7*qmunu) + 6*dmunu*qvec[mu]*prop[mu] - - 7*(dmunu-9*qmunu)*DotProd(qavec,q2) - + 3*(prop[mu]*qvec[nu]*(1-7*q2[mu])+prop[nu]*qvec[mu]*(1-7*q2[nu])); - temp*=kd*invrn3/48; - cMultReal(temp,m,Gm1); - cMult_i(Gm1); - cMultSelf(Gm1,expval); - // add Gm1 to Gm0 - cAdd(Gm0,Gm1,Gm0); - } - // result = Gf + Gm0 + [ Gm1 ] - cAdd(Gm0,result,result); - } - } - } - // if (pr) PRINTZ("%d,%d: %f+%fi\n",mu,nu,result[RE],result[IM]); + double rr,rtemp[3],qvec[3],q2[3],invr,invr3,qavec[3],av[3]; + double rr2,kr,kr2,kr3,kd2,q4,rn; + double temp,qmunu,qa,qamunu,invrn,invrn2,invrn3,invrn4,dmunu; + double kfr,ci,si,ci1,si1,ci2,si2,brd,cov,siv,g0,g2; + doublecomplex expval,br,br1,m,m2,Gf1,Gm0,Gm1,Gc1,Gc2; + int ind0,ind1,ind2,ind2m,ind3,ind4,indmunu; + int sigV[3],ic,sig,ivec[3],ord[3],invord[3]; + double t3q,t3a,t4q,t4a,t5tr,t5aa,t6tr,t6aa; +/* int pr; */ + const int inter_avg=TRUE; + + /* self interaction; self term is computed in different subroutine */ + if (i==0 && j==0 && k==0) { + result[RE]=result[IM]=0.0; + return; + } + +/* pr=(i==1 && j==1 && k==1); + if (pr) PRINTZ("%d,%d: ",mu,nu); /* for debugging */ + + /* initialize rtemp */ + rtemp[0]=i*gridspace; + rtemp[1]=j*gridspace; + rtemp[2]=k*gridspace; + /*====== calculate some basic constants ======*/ + rr2 = DotProd(rtemp,rtemp); + rr = sqrt(rr2); + rn=rr/gridspace; /* normalized r */ + invr = 1/rr; + invr3 = invr*invr*invr; + MultScal(invr,rtemp,qvec); + kr = WaveNum * rr; + kr2 = kr*kr; + kfr=PI*rn; /* k_F*r, for FCD */ + qmunu=qvec[mu]*qvec[nu]; + /* cov=cos(kr); siv=sin(kr); expval=Exp(ikr)/r^3 */ + imExp(kr,expval); + cov=expval[RE]; + siv=expval[IM]; + cMultReal(invr3,expval,expval); + /*====== calculate Gp ========*/ + /* br=delta[mu,nu]*(-1+ikr+kr^2)-qmunu*(-3+3ikr+kr^2) */ + br[RE]=(3-kr2)*qmunu; + br[IM]=-3*kr*qmunu; + if(mu==nu) { + br[RE]+=kr2-1; + br[IM]+=kr; + } + /* result=Gp=expval*br */ + cMult(br,expval,result); + /*====== FCD (static and full) ========*/ + /* !!! speed of FCD can be improved by using faster version of sici routine, using predefined + tables, etc (e.g. as is done in gsl library). But currently this do not seem to be significant + portion of the total simulation time */ + + if (IntRelation==G_FCD_ST) { + /* FCD is Based on + Gay-Balmaz P., Martin O.J.F. "A library for computing the filtered and non-filtered 3D + Green's tensor associated with infinite homogeneous space and surfaces", Comp. Phys. Comm. + 144:111-120 (2002), and + Piller N.B. "Increasing the performance of the coupled-dipole approximation: A spectral + approach", IEEE Trans.Ant.Propag. 46(8):1126-1137. + differing by a factor of 4*pi*k^2 */ + + /* result = Gp*[3*Si(k_F*r)+k_F*r*cos(k_F*r)-4*sin(k_F*r)]*2/(3*pi) */ + cisi(kfr,&ci,&si); + brd=TWO_OVER_PI*ONE_THIRD*(3*si+kfr*cos(kfr)-4*sin(kfr)); + cMultReal(brd,result,result); + } + else if (IntRelation==G_FCD) { + /* ci,si1,2=ci,si+-=Ci,Si((k_F+-k)r) */ + cisi(kfr+kr,&ci1,&si1); + cisi(kfr-kr,&ci2,&si2); + /* ci=ci1-ci2; si=pi-si1-si2 */ + ci=ci1-ci2; + si=PI-si1-si2; + g0=INV_PI*(siv*ci+cov*si); + g2=INV_PI*(kr*(cov*ci-siv*si)+2*ONE_THIRD*(kfr*cos(kfr)-4*sin(kfr)))-g0; + temp=g0*kr2; + /* brd=(delta[mu,nu]*(-g0*kr^2-g2)+qmunu*(g0*kr^2+3g2))/r^3 */ + brd=qmunu*(temp+3*g2); + if (mu==nu) brd-=temp+g2; + brd*=invr3; + /* result=Gp+brd */ + result[RE]+=brd; + } + /*======= second order corrections ========*/ + else if (IntRelation==G_SO) { + /* this should never happen !!! */ + if (anisotropy) LogError(EC_ERROR,ONE_POS,"Incompatibility error in CalcInterTerm"); + kd2=kd*kd; + kr3=kr2*kr; + /* only one refractive index can be used for FFT-compatible algorithm !!! */ + cEqual(ref_index[0],m); + cSquare(m,m2); + if (!inter_avg) { + qa=DotProd(qvec,prop); + /* qamunu=qvec[mu]*prop[nu] + qvec[nu]*prop[mu] */ + qamunu=qvec[mu]*prop[nu]; + if (mu==nu) qamunu*=2; + else qamunu+=qvec[nu]*prop[mu]; + } + if (kr*rn < G_BOUND_CLOSE) { + /*====== G close =============*/ + /* check if inside the table bounds; needed to recompute to make an integer comparison */ + if ((i*i+j*j+k*k) > TAB_RMAX*TAB_RMAX) LogError(EC_ERROR,ALL_POS, + "Not enough table size (available only up to R/d=%d)",TAB_RMAX); + + /* av is copy of propagation vector */ + if (!inter_avg) memcpy(av,prop,3*sizeof(double)); + ivec[0]=i; + ivec[1]=j; + ivec[2]=k; + /* transformation of negative coordinates */ + for (ic=0;ic<3;ic++) { + if (ivec[ic]<0) { + sigV[ic]=-1; + av[ic]*=-1; + qvec[ic]*=-1; + ivec[ic]*=-1; + } + else sigV[ic]=1; + } + i=ivec[0]; + j=ivec[1]; + k=ivec[2]; + sig=sigV[mu]*sigV[nu]; /* sign of some terms below */ + /* transformation to case i>=j>=k>=0 */ + /* building of ord; ord[x] is x-th largest coordinate (0-th - the largest) */ + if (i>=j) { + if (i>=k) { + ord[0]=0; + if (j>=k) { + ord[1]=1; + ord[2]=2; + } + else { + ord[1]=2; + ord[2]=1; + } + } + else { + ord[0]=2; + ord[1]=0; + ord[2]=1; + } + } + else { + if (i>=k) { + ord[0]=1; + ord[1]=0; + ord[2]=2; + } + else { + ord[2]=0; + if (j>=k) { + ord[0]=1; + ord[1]=2; + } + else { + ord[0]=2; + ord[1]=1; + } + } + } + /* change parameters according to coordinate transforms */ + Permutate(qvec,ord); + if (!inter_avg) Permutate(av,ord); + Permutate_i(ivec,ord); + i=ivec[0]; + j=ivec[1]; + k=ivec[2]; + /* compute inverse permutation */ + memcpy(invord,ord,3*sizeof(int)); + Permutate_i(invord,ord); + if (invord[0]==0 && invord[1]==1 && invord[2]==2) memcpy(invord,ord,3*sizeof(int)); + /* compute transformed indices mu and nu */ + mu=invord[mu]; + nu=invord[nu]; + /* indexes for tables of different dimensions */ + /* indmunu is a number of component[mu,nu] in symmetric matrix */ + indmunu=mu+nu; + if (mu==2 || nu==2) indmunu++; + + ind0=tab_index[i][j]+k; + ind1=3*ind0; + ind2m=6*ind0; + ind2=ind2m+indmunu; + ind3=3*ind2; + ind4=6*ind2; + /* computing several quantities with table integrals */ + t3q=DotProd(qvec,tab3+ind1); + t4q=DotProd(qvec,tab4+ind3); + t5tr=TrSym(tab5+ind2m); + t6tr=TrSym(tab6+ind4); + if (inter_avg) { + /* =1/3*delta[mu,nu] */ + t5aa=ONE_THIRD*t5tr; + t6aa=ONE_THIRD*t6tr; + } + else { + t3a=DotProd(av,tab3+ind1); + t4a=DotProd(av,tab4+ind3); + t5aa=QuadForm(tab5+ind2m,av); + t6aa=QuadForm(tab6+ind4,av); + } + /*====== computing Gc0 =====*/ + /* temp = kr/24 */ + temp=kr/24; + /* br=delta[mu,nu]*(-I7-I9/2-kr*(i+kr)/24+2*t3q+t5tr)- + (-3I8[mu,nu]-3I10[mu,nu]/2-qmunu*kr*(i+kr)/24+2*t4q+t6tr) */ + br[RE]=sig*(3*(tab10[ind2]/2+tab8[ind2])-2*t4q-t6tr)+temp*qmunu*kr; + br[IM]=3*temp*qmunu; + if (mu==nu) { + br[RE]+=2*t3q+t5tr-temp*kr-tab9[ind0]/2-tab7[ind0]; + br[IM]-=temp; + } + /* br*=kd^2 */ + cMultReal(kd2,br,br); + /* br+=I1*delta[mu,nu]*(-1+ikr+kr^2)-sig*I2[mu,nu]*(-3+3ikr+kr^2) */ + br[RE]+=sig*tab2[ind2]*(3-kr2); + br[IM]-=sig*tab2[ind2]*3*kr; + if (mu==nu) { + br[RE]+=tab1[ind0]*(kr2-1); + br[IM]+=tab1[ind0]*kr; + } + /* Gc0=expval*br */ + cMult(expval,br,result); + /*==== computing Gc1 ======*/ + if (!inter_avg) { + /* br=(kd*kr/24)*(qa*(delta[mu,nu]*(-2+ikr)-qmunu*(-6+ikr))-qamunu)*/ + br[RE]=6*qmunu; + br[IM]=-kr*qmunu; + if (mu==nu) { + br[RE]-=2; + br[IM]+=kr; + } + cMultReal(qa,br,br); + br[RE]-=qamunu; + cMultReal(2*temp*kd,br,br); + /* br1=(d/r)*(delta[mu,nu]*t3h*(-1+ikr)-sig*t4h*(-3+3ikr)) */ + br1[RE]=3*sig*t4a; + br1[IM]=-kr*br1[RE]; + if (mu==nu) { + br1[RE]-=t3a; + br1[IM]+=t3a*kr; + } + cMultReal(1/rn,br1,br1); + /* Gc1=expval*i*m*kd*(br1+br) */ + cAdd(br,br1,Gc1); + cMultSelf(Gc1,m); + cMultReal(kd,Gc1,Gc1); + cMultSelf(Gc1,expval); + cMult_i(Gc1); + } + /*==== computing Gc2 ======*/ + /* br=delta[mu,nu]*t5aa-3*sig*t6aa-(kr/12)*(delta[mu,nu]*(i+kr)-qmunu*(3i+kr)) */ + br[RE]=-kr*qmunu; + br[IM]=-3*qmunu; + if (mu==nu) { + br[RE]+=kr; + br[IM]+=1; + } + cMultReal(-2*temp,br,br); + br[RE]-=3*sig*t6aa; + if (mu==nu) br[RE]+=t5aa; + /* Gc2=expval*(kd^2/2)*m^2*br */ + cMult(m2,br,Gc2); + cMultReal(kd2/2,Gc2,Gc2); + cMultSelf(Gc2,expval); + /* result = Gc0 + [ Gc1 ] + Gc2 */ + if (!inter_avg) cAdd(Gc2,Gc1,Gc2); + cAdd(Gc2,result,result); + } + else { + /*====== Gfar (and part of Gmedian) =======*/ + /* temp=kd^2/24 */ + temp=kd2/24; + /* br=1-(1+m^2)*kd^2/24 */ + br[RE]=1-(1+m2[RE])*temp; + br[IM]=-m2[IM]*temp; + /* Gf0 + Gf2 = Gp*br */ + cMultSelf(result,br); + /*==== compute and add Gf1 ===*/ + if (!inter_avg) { + /* br={delta[mu,nu]*(3-3ikr-2kr^2+ikr^3)-qmunu*(15-15ikr-6kr^2+ikr^3)}*qa + +qamunu*(3-3ikr-kr^2) */ + br[RE]=(6*kr2-15)*qmunu; + br[IM]=(15*kr-kr3)*qmunu; + if(mu==nu) { + br[RE]+=3-2*kr2; + br[IM]+=kr3-3*kr; + } + cMultReal(qa,br,br); + br[RE]+=(3-kr2)*qamunu; + br[IM]-=3*kr*qamunu; + /* temp = kd^2/(12*kr) */ + temp*=2/kr; + /* Gf1=expval*i*m*temp*br */ + cMult(m,br,Gf1); + cMultReal(temp,Gf1,Gf1); + cMultSelf(Gf1,expval); + cMult_i(Gf1); + /* result = Gf */ + cAdd(Gf1,result,result); + } + if (kr < G_BOUND_MEDIAN) { + /*===== G median ========*/ + vMult(qvec,qvec,q2); + q4=DotProd(q2,q2); + invrn=1/rn; + invrn2=invrn*invrn; + invrn3=invrn2*invrn; + invrn4=invrn2*invrn2; + /* Gm0=expval*br*temp */ + temp=qmunu*(33*q4-7-12*(q2[mu]+q2[nu])); + if (mu == nu) temp+=(1-3*q4+4*q2[mu]); + temp*=7*invrn4/64; + br[RE]=-1; + br[IM]=kr; + cMultReal(temp,br,Gm0); + cMultSelf(Gm0,expval); + if (!inter_avg) { + /* Gm1=expval*i*m*temp */ + vMult(qvec,prop,qavec); + if (mu == nu) dmunu=1; + else dmunu=0; + temp=3*qa*(dmunu-7*qmunu)+6*dmunu*qvec[mu]*prop[mu]-7*(dmunu-9*qmunu)*DotProd(qavec,q2)+ + 3*(prop[mu]*qvec[nu]*(1-7*q2[mu])+prop[nu]*qvec[mu]*(1-7*q2[nu])); + temp*=kd*invrn3/48; + cMultReal(temp,m,Gm1); + cMult_i(Gm1); + cMultSelf(Gm1,expval); + /* add Gm1 to Gm0 */ + cAdd(Gm0,Gm1,Gm0); + } + /* result = Gf + Gm0 + [ Gm1 ]*/ + cAdd(Gm0,result,result); + } + } + } + /* if (pr) PRINTZ("%d,%d: %f+%fi\n",mu,nu,result[RE],result[IM]); */ } -//============================================================ +/*============================================================*/ void InitDmatrix(void) -/* Initializes the matrix D. D[i][j][k]=A[i1-i2][j1-j2][k1-k2]. Actually D=-FFT(G)/Ngrid. - * Then -G.x=invFFT(D*FFT(x)) for practical implementation of FFT such that invFFT(FFT(x))=Ngrid*x. - * G is exactly Green's tensor. The routine is called only once, so needs not to be very fast, - * however we tried to optimize it. - */ + /* initialises the matrix D. D[i][j][k]=A[i1-i2][j1-j2][k1-k2] + Actually D=-FFT(G)/Ngrid. Then -G.x=invFFT(D*FFT(x)) for practical + implementation of FFT such that invFFT(FFT(x))=Ngrid*x. G is exactly Green's tensor + The routine is called only once, so needs not to be very fast, however we tried + to optimize it. */ { - int i,j,k,kcor,Dcomp; - size_t x,y,z,indexfrom,indexto,ind,index,D2sizeTot; - double invNgrid,mem; - int nnn; // multiplier used for reduced_FFT or not reduced; 1 or 2 - int jstart, kstart; - size_t lengthN; - int mu, nu; // indices for interaction term - TIME_TYPE start,time1; + int i,j,k,kcor,Dcomp; + size_t x,y,z,indexfrom,indexto,ind,index,D2sizeTot; + double invNgrid,mem; + int nnn; /* multiplier used for reduced_FFT or not reduced; 1 or 2 */ + int jstart, kstart; + size_t lengthN; + int mu, nu; /* indices for interaction term */ + TIME_TYPE start,time1; #ifdef PARALLEL - size_t bufsize; + size_t bufsize; #endif #ifdef PRECISE_TIMING - // precise timing of the Dmatrix computation - SYSTEM_TIME tvp[13]; - SYSTEM_TIME Timing_fftX,Timing_fftY,Timing_fftZ,Timing_ar1,Timing_ar2,Timing_ar3, - Timing_BT,Timing_TYZ,Timing_beg; - double t_fftX,t_fftY,t_fftZ,t_ar1,t_ar2,t_ar3, - t_TYZ,t_beg,t_Arithm,t_FFT,t_BT; - - InitTime(&Timing_fftX); - InitTime(&Timing_fftY); - InitTime(&Timing_fftZ); - InitTime(&Timing_ar1); - InitTime(&Timing_ar2); - InitTime(&Timing_ar3); - InitTime(&Timing_BT); - InitTime(&Timing_TYZ); - GetTime(tvp); + /* precise timing of the Dmatrix computation */ + SYSTEM_TIME tvp[13]; + SYSTEM_TIME Timing_fftX,Timing_fftY,Timing_fftZ,Timing_ar1,Timing_ar2,Timing_ar3, + Timing_BT,Timing_TYZ,Timing_beg; + double t_fftX,t_fftY,t_fftZ,t_ar1,t_ar2,t_ar3, + t_TYZ,t_beg,t_Arithm,t_FFT,t_BT; + + InitTime(&Timing_fftX); + InitTime(&Timing_fftY); + InitTime(&Timing_fftZ); + InitTime(&Timing_ar1); + InitTime(&Timing_ar2); + InitTime(&Timing_ar3); + InitTime(&Timing_BT); + InitTime(&Timing_TYZ); + GetTime(tvp); #endif - start=GET_TIME(); - // initialize sizes of D and D2 matrices - D2sizeX=gridX; - if (reduced_FFT) { - D2sizeY=gridY/2; - D2sizeZ=gridZ/2; - DsizeY=gridY/2+1; - DsizeZ=gridZ/2+1; - nnn=1; - jstart=0; - kstart=0; - } - else { - D2sizeY=DsizeY=gridY; - D2sizeZ=DsizeZ=gridZ; - nnn=2; - jstart=1-boxY; - kstart=1-boxZ; - } - // auxiliary parameters - lengthN=nnn*local_Nz; - DsizeYZ=DsizeY*DsizeZ; - invNgrid=1.0/(gridX*((double)gridYZ)); - local_Nsmall=(gridX/2)*(gridYZ/(2*nprocs)); // size of X vector (for 1 component) - /* calculate size of matvec matrices (X,D,slices,slices_tr) and BT buffers (if parallel); - * uses complex expression to avoid overflows and enable prognosis for large grids - */ - mem = sizeof(doublecomplex)*(3*(2+(gridX/(4.0*nprocs)))*((double)gridYZ) - + NDCOMP*local_Nx*((double)DsizeYZ)); + start=GET_TIME(); + /* initialize sizes of D and D2 matrices */ + D2sizeX=gridX; + if (reduced_FFT) { + D2sizeY=gridY/2; + D2sizeZ=gridZ/2; + DsizeY=gridY/2+1; + DsizeZ=gridZ/2+1; + nnn=1; + jstart=0; + kstart=0; + } + else { + D2sizeY=DsizeY=gridY; + D2sizeZ=DsizeZ=gridZ; + nnn=2; + jstart=1-boxY; + kstart=1-boxZ; + } + /* auxiliary parameters */ + lengthN=nnn*local_Nz; + DsizeYZ=DsizeY*DsizeZ; + invNgrid=1.0/(gridX*((double)gridYZ)); + local_Nsmall=(gridX/2)*(gridYZ/(2*nprocs)); /* size of X vector (for 1 component) */ + /* calculate size of matvec matrices (X,D,slices,slices_tr) and BT buffers (if parallel) + uses complex expression to avoid overflows and enable prognose for large grids */ + mem=sizeof(doublecomplex)*(3*(2+(gridX/(4.0*nprocs)))*((double)gridYZ) + +NDCOMP*local_Nx*((double)DsizeYZ)); #ifdef PARALLEL - mem+=12*smallY*((double)(local_Nz*local_Nx))*sizeof(double); + mem+=12*smallY*((double)(local_Nz*local_Nx))*sizeof(double); #endif - // printout some information - /* conversions to (unsigned long) are needed (to remove warnings) because %z printf argument is not - * yet supported by all target compiler environmets - */ - FPRINTZ(logfile,"The FFT grid is: %lux%lux%lu\n",(unsigned long)gridX,(unsigned long)gridY, - (unsigned long)gridZ); + /* printout some information */ + FPRINTZ(logfile,"The FFT grid is: %ux%ux%u\n",gridX,gridY,gridZ); #ifdef PARALLEL - PRINTBOTHZ(logfile,"Memory usage for MatVec matrices (per processor): %.1f Mb\n",mem/MBYTE); + PRINTBOTHZ(logfile,"Memory usage for MatVec matrices (per processor): %.1f Mb\n",mem/MBYTE); #else - PRINTBOTHZ(logfile,"Memory usage for MatVec matrices: %.1f Mb\n",mem/MBYTE); + PRINTBOTHZ(logfile,"Memory usage for MatVec matrices: %.1f Mb\n",mem/MBYTE); #endif - FFLUSHZ(logfile); - memory+=mem; - if (prognose) return; - // allocate memory for Dmatrix - MALLOC_VECTOR(Dmatrix,complex,MultOverflow(NDCOMP*local_Nx,DsizeYZ,ONE_POS,"Dmatrix"),ALL); - // allocate memory for D2matrix components - D2sizeTot=nnn*local_Nz*D2sizeY*D2sizeX; - MALLOC_VECTOR(D2matrix,complex,D2sizeTot,ALL); - MALLOC_VECTOR(slice,complex,gridYZ,ALL); - MALLOC_VECTOR(slice_tr,complex,gridYZ,ALL); - // actually allocation of Xmatrix, slices, slices_tr is below after freeing of Dmatrix and its slice + FFLUSHZ(logfile); + memory+=mem; + if (prognose) return; + /* allocate memory for Dmatrix */ + MALLOC_VECTOR(Dmatrix,complex,MultOverflow(NDCOMP*local_Nx,DsizeYZ,ONE_POS,"Dmatrix"),ALL); + /* allocate memory for D2matrix components */ + D2sizeTot=nnn*local_Nz*D2sizeY*D2sizeX; + MALLOC_VECTOR(D2matrix,complex,D2sizeTot,ALL); + MALLOC_VECTOR(slice,complex,gridYZ,ALL); + MALLOC_VECTOR(slice_tr,complex,gridYZ,ALL); + /* actually allocation of Xmatrix, slices, slices_tr is below; + after freeing of Dmatrix and its slice */ #ifdef PARALLEL - // allocate buffer for BlockTranspose_Dm - bufsize = 2*lengthN*D2sizeY*local_Nx; - MALLOC_VECTOR(BT_buffer,double,bufsize,ALL); - MALLOC_VECTOR(BT_rbuffer,double,bufsize,ALL); + /* allocate buffer for BlockTranspose_Dm */ + bufsize = 2*lengthN*D2sizeY*local_Nx; + MALLOC_VECTOR(BT_buffer,double,bufsize,ALL); + MALLOC_VECTOR(BT_rbuffer,double,bufsize,ALL); #endif - D("Initialize FFT (1st part)"); - fftInitBeforeD(lengthN); + D("init FFT (1st part)"); + fftInitBeforeD(lengthN); #ifdef PRECISE_TIMING - GetTime(tvp+1); - elapsed(tvp,tvp+1,&Timing_beg); + GetTime(tvp+1); + elapsed(tvp,tvp+1,&Timing_beg); #endif - PRINTZ("Calculating Dmatrix"); - FFLUSHZ(stdout); + PRINTZ("Calculating Dmatrix"); + FFLUSHZ(stdout); - for(Dcomp=0;Dcomp(int)smallZ) kcor=k-gridZ; - else kcor=k; - for (j=jstart;j(int)smallZ) kcor=k-gridZ; + else kcor=k; + for (j=jstart;j #include #include -// the following is for MkDirErr +/* the following is for MkDirErr */ #include "os.h" #ifdef POSIX -# include -# include +# include +# include #endif #include "io.h" @@ -23,281 +23,286 @@ #include "vars.h" #include "memory.h" -// SEMI-GLOBAL VARIABLES +/* SEMI-GLOBAL VARIABLES */ -// defined and initialized in param.c +/* defined and initialized in param.c */ extern const char logname[]; -// LOCAL VARIABLES +/* LOCAL VARIABLES */ - // error buffer for warning message generated before logfile is opened -static char warn_buf[MAX_MESSAGE2]=""; -//============================================================ +static char warn_buf[MAX_MESSAGE2]=""; /* error buffer for warning message generated before + logfile is opened */ +/*============================================================*/ void WrapLines(char *str) -/* wraps long lines in a string without breaking words; it replaces a number of spaces in string by - * '\n' characters; line width is determined by variable term_width. - */ + /* wraps long lines in a string without breaking words; it replaces a number of spaces in string + by '\n' characters; line width is determined by variable term_width */ { - char *left,*right,*mid,*end; - int divided; - - end=str+strlen(str); - left=str; - while (leftterm_width) { - divided=FALSE; - mid=left+term_width; - // search backward for space - while (mid>=left) { - if(mid[0]==' ') { - mid[0]='\n'; - left=mid+1; - divided=TRUE; - break; - } - mid--; - } - // if backward search failed (too long word), search forward for space - if (!divided) { - mid=left+term_width+1; - while (midterm_width) { + divided=FALSE; + mid=left+term_width; + /* search backward for space */ + while (mid>=left) { + if(mid[0]==' ') { + mid[0]='\n'; + left=mid+1; + divided=TRUE; + break; + } + mid--; + } + /* if backward search failed (too long word), search forward for space */ + if (!divided) { + mid=left+term_width+1; + while (mid // for file -#include "function.h" // for function attributes +#include /* for file */ +#include "function.h" /* for function attributes */ -/* File locking is made quite robust, however it is a complex operation that can cause unexpected - * behavior (permanent locks) especially when program is terminated externally (e.g. because of MPI - * failure). Moreover, it is not ANSI C, hence may have problems on some particular systems. - * Currently file locking functions are only in param.c - */ +/* file locking is made quite robust, however it is a complex operation that + can cause unexpected behaviour (permanent locks) especially when + program is terminated externally (e.g. because of MPI failure). + Moreover, it is not ANSI C, hence may have problems on some particular systems. + Currently file locking functions are only in param.c */ -//#define NOT_USE_LOCK // uncomment to disable file locking -//#define ONLY_LOCKFILE // uncomment to use only lock file, without file locking over NFS +/*#define NOT_USE_LOCK /* uncomment to disable file locking */ +/*#define ONLY_LOCKFILE /* uncomment to use only lock file, without file locking over NFS */ #ifndef NOT_USE_LOCK -# define USE_LOCK -# ifndef ONLY_LOCKFILE -# define LOCK_FOR_NFS // currently this works only for POSIX -# endif +# define USE_LOCK +# ifndef ONLY_LOCKFILE +# define LOCK_FOR_NFS /* currently this works only for POSIX */. +# endif #endif void WrapLines(char *str); @@ -34,10 +34,14 @@ void PrintError(const char *fmt, ... ) ATT_PRINTF(1,2) ATT_NORETURN; void LogPending(void); void PrintBoth(FILE *file,const char *fmt, ... ) ATT_PRINTF(2,3); -FILE *FOpenErr(const char *fname,const char *mode,int who,const char *err_fname, - int lineN) ATT_MALLOC; +FILE *FOpenErr(const char *fname,const char *mode,int who, + const char *err_fname,int lineN) ATT_MALLOC; void FCloseErr(FILE *file,const char *fname,int who,const char *err_fname,int lineN); void RemoveErr(const char *fname,int who,const char *err_fname,int lineN); void MkDirErr(const char *dirname,int who,const char *err_fname,int lineN); -#endif // __io_h +#endif /* __io_h */ + + + + diff --git a/src/iterative.c b/src/iterative.c index 72854fb2..ab1c6da8 100644 --- a/src/iterative.c +++ b/src/iterative.c @@ -28,7 +28,7 @@ * This code is covered by the GNU General Public License. */ #include -#include // for time_t & time +#include /* for time_t & time */ #include #include #include "vars.h" @@ -39,700 +39,696 @@ #include "io.h" #include "timing.h" #include "function.h" -#include "debug.h" -// maximum allowed iterations without residual decrease +/* maximum allowed iterations without residual decrease */ #define MAXCOUNT_CGNR 10 #define MAXCOUNT_BICGSTAB 30000 #define MAXCOUNT_BICG_CS 50000 #define MAXCOUNT_QMR_CS 50000 -// zero value for checks -#define EPS_BICGSTAB1 1E-16 // for (r~.r)/(r.r) -#define EPS_BICGSTAB2 1E-10 // for 1/|beta_k| -#define EPS_BICG_CS1 1E-10 // for (rT.r)/(r.r) -#define EPS_BICG_CS2 1E-10 // for (pT.A.p)/(rT.r) -#define EPS_QMR_CS1 1E-10 // for (vT.v)/(r.r) -#define EPS_QMR_CS2 1E-40 // for overflow of exponent number +/* zero value for checks */ +#define EPS_BICGSTAB 1E-8 +#define EPS_BICG_CS 1E-8 +#define EPS_QMR_CS 1E-8 +#define EPS_QMR_CS_1 1E-40 /* problem can only occur if overflow of exponent number */ -// SEMI-GLOBAL VARIABLES +/* SEMI-GLOBAL VARIABLES */ -// defined and initialized in CalculateE.c +/* defined and initialized in CalculateE.c */ extern const TIME_TYPE tstart_CE; -// defined and initialized in calculator.c +/* defined and initialized in calculator.c */ extern doublecomplex *rvec,*vec1,*vec2,*vec3,*Avecbuffer; -// defined and initialized in param.c +/* defined and initialized in param.c */ extern const double eps; -// defined and initialized in timing.c +/* defined and initialized in timing.c */ extern TIME_TYPE Timing_OneIter,Timing_InitIter,Timing_InitIter_comm; extern unsigned long TotalIter; -// LOCAL VARIABLES - -static double inprodR; // used as r_0 (and main residual) in each method -static double epsB; // stopping criterion -static double resid_scale; // scale to get square of relative error -static double prev_err; /* previous relative error; used in ProgressReport, initialized in - * IterativeSolver - */ -static int method; // iteration method -static int count; // iteration count -static int counter; // number of successive iterations without residual decrease -static int max_count; // maximum allowed value of counter -static int chp_exit; // checkpoint occurred - exit -static int chp_skip; // skip checkpoint, even if it is time to do -typedef struct // data for checkpoints +/* LOCAL VARIABLES */ + +static double inprodR; /* uses as r_0 (and main residual) in each method */ +static double epsB; /* stopping criterion */ +static double resid_scale; /* scale to get square of relative error */ +static double prev_err; /* previous Rel.Error; used in ProgressReport, + initilized in IterativeSolver */ +static int method; /* iteration method */ +static int count; /* iteration count */ +static int counter; /* number of successive iterations without residual decrease */ +static int max_count; /* maximum allowed value of counter */ +static int chp_exit; /* checkpoint occured - exit */ +static int chp_skip; /* skip checkpoint, even if it is time to do */ +typedef struct /* data for checkpoints */ { - void *ptr; // pointer to the data - int size; // size of one element + void *ptr; /* pointer to the data */ + int size; /* size of one element */ } chp_data; -typedef struct // structure to hold information about different scalars and vectors +typedef struct /* structure to hold information about different scalars and vectors */ { - chp_data *sc; // array of scalar data - int sc_N; // number of scalars - chp_data *vec; // array of vector data - int vec_N; // number of vectors + chp_data *sc; /* array of scalar data */ + int sc_N; /* number of scalars */ + chp_data *vec; /* array of vector data */ + int vec_N; /* number of vectors */ } iter_data_type; -static iter_data_type iter_data; // actually the structure +static iter_data_type iter_data; /* actually the structure */ -// EXTERNAL FUNCTIONS +/* EXTERNAL FUNCTIONS */ -// matvec.c +/* matvec.c */ void MatVec(doublecomplex *in,doublecomplex *out,double *inprod,int her); -//============================================================ +/*============================================================*/ INLINE void SwapPointers(doublecomplex **a,doublecomplex **b) -/* swap two pointers of (doublecomplex *) type; should work for others but will give - * "Suspicious pointer conversion" warning. - */ + /* swap two pointers of (doublecomplex *) type; + should work for others but will give "Suspisious pointer conversion" warning */ { - doublecomplex *tmp; + doublecomplex *tmp; - tmp=*a; - *a=*b; - *b=tmp; + tmp=*a; + *a=*b; + *b=tmp; } -//============================================================ +/*============================================================*/ static void SaveIterChpoint(void) -/* save a binary checkpoint; only limitedly foolproof - user should take care to load checkpoints - * on the same machine (number of processors) and with the same command line. - */ + /* save a binary checkpoint; + only limitedly foolproof - user should take care to load checkpoints + on the same machine (number of processors) and with the same command line */ { - int i; - char fname[MAX_FNAME]; - FILE *chp_file; - TIME_TYPE tstart; - - tstart=GET_TIME(); - if (ringid==ROOT) { - // create directory "chp_dir" if needed and open info file - sprintf(fname,"%s/" F_CHP_LOG,chp_dir); - if ((chp_file=fopen(fname,"w"))==NULL) { - MkDirErr(chp_dir,ONE_POS); - chp_file=FOpenErr(fname,"w",ONE_POS); - } - // write info and close file - fprintf(chp_file, - "Info about the run, which produced the checkpoint, can be found in ../%s",directory); - FCloseErr(chp_file,fname,ONE_POS); - } - // wait to ensure that directory exists - Synchronize(); - // open output file; writing errors are checked only for vectors - sprintf(fname,"%s/" F_CHP,chp_dir,ringid); - chp_file=FOpenErr(fname,"wb",ALL_POS); - // write common scalars - fwrite(&method,sizeof(int),1,chp_file); - fwrite(&nlocalRows,sizeof(size_t),1,chp_file); - fwrite(&count,sizeof(int),1,chp_file); - fwrite(&counter,sizeof(int),1,chp_file); - fwrite(&inprodR,sizeof(double),1,chp_file); - fwrite(&prev_err,sizeof(double),1,chp_file); // written on ALL processors but used only on ROOT - fwrite(&resid_scale,sizeof(double),1,chp_file); - // write specific scalars - for (i=0;i0) strcpy(temp,"-+"); - else strcpy(temp,"- "); - sprintf(progr_string,"RE_%03d = %.10E %s",count,err,temp); - if (!orient_avg) { - fprintf(logfile,"%s progress = %.6f\n",progr_string,progr); - fflush(logfile); - } - printf("%s\n",progr_string); - fflush(stdout); - - prev_err=err; - } - count++; - TotalIter++; - // check condition for checkpoint; checkpoint is saved at first time - if (chp_type!=CHP_NONE && chp_time!=UNDEF && !chp_skip) { - time(&wt); - elapsed=difftime(wt,last_chp_wt); - if (chp_time0) strcpy(temp,"-+"); + else strcpy(temp,"- "); + sprintf(progr_string,"RE_%03d = %.10E %s",count,err,temp); + if (!orient_avg) { + fprintf(logfile,"%s progress = %.6f\n",progr_string,progr); + fflush(logfile); + } + printf("%s\n",progr_string); + fflush(stdout); + + prev_err=err; + } + count++; + TotalIter++; + + /* check condition for checkpoint; checkpoint is saved at first time */ + if (chp_type!=CHP_NONE && chp_time!=UNDEF && !chp_skip) { + time(&wt); + elapsed=difftime(wt,last_chp_wt); + if (chp_time=epsB && count<=maxiter && counter<=max_count && !chp_exit) { - Timing_OneIterComm=0; // initialize time - tstart=GET_TIME(); - // p_1=Ah.r_0 and ro_new=ro_0=|Ah.r_0|^2 - if (count==1) MatVec(rvec,pvec,&ro_new,TRUE); - else { - // Avecbuffer=AH.r_k-1, ro_new=ro_k-1=|AH.r_k-1|^2 - MatVec(rvec,Avecbuffer,&ro_new,TRUE); - // beta_k-1=ro_k-1/ro_k-2 - beta=ro_new/ro_old; - // p_k=beta_k-1*p_k-1+AH.r_k-1 - nIncrem10(pvec,Avecbuffer,beta,NULL,&Timing_OneIterComm); - } - // alpha_k=ro_k-1/|A.p_k|^2 - // Avecbuffer=A.p_k - MatVec(pvec,Avecbuffer,&denumeratorAlpha,FALSE); - alpha=ro_new/denumeratorAlpha; - // x_k=x_k-1+alpha_k*p_k - nIncrem01(xvec,pvec,alpha,NULL,&Timing_OneIterComm); - // r_k=r_k-1-alpha_k*A.p_k and |r_k|^2 - nIncrem01(rvec,Avecbuffer,-alpha,&inprodRplus1,&Timing_OneIterComm); - // initialize ro_old -> ro_k-2 for next iteration - ro_old=ro_new; - - Timing_OneIter=GET_TIME()-tstart; - // check progress - ProgressReport(inprodRplus1); - } // end of the big while loop - AfterIterFinished(); + double inprodRplus1; /* inner product of rk+1 */ + double alpha, denumeratorAlpha; + double beta,ro_new,ro_old=0; /* initialization to remove compiler warning */ + TIME_TYPE tstart; + chp_data scalars[1]; + + max_count=mc; + /* initialize data structure for checkpoints */ + scalars[0].ptr=&ro_old; + scalars[0].size=sizeof(double); + iter_data.sc=scalars; + iter_data.sc_N=1; + iter_data.vec=NULL; + iter_data.vec_N=0; + /* initialization of constants and vectors */ + if (load_chpoint) LoadIterChpoint(); + Timing_InitIter = GET_TIME() - tstart_CE; /* initialization complete */ + /* main iteration cycle */ + while (inprodR>=epsB && count<=maxiter && counter<=max_count && !chp_exit) { + Timing_OneIterComm=0; /* initialize time */ + tstart=GET_TIME(); + /* p_1=Ah.r_0 and ro_new=ro_0=|Ah.r_0|^2 */ + if (count==1) MatVec(rvec,pvec,&ro_new,TRUE); + else { + /* Avecbuffer=AH.r_k-1, ro_new=ro_k-1=|AH.r_k-1|^2 */ + MatVec(rvec,Avecbuffer,&ro_new,TRUE); + /* beta_k-1=ro_k-1/ro_k-2 */ + beta=ro_new/ro_old; + /* p_k=beta_k-1*p_k-1+AH.r_k-1 */ + nIncrem10(pvec,Avecbuffer,beta,NULL,&Timing_OneIterComm); + } + /* alpha_k=ro_k-1/|A.p_k|^2 */ + /* Avecbuffer=A.p_k */ + MatVec(pvec,Avecbuffer,&denumeratorAlpha,FALSE); + alpha=ro_new/denumeratorAlpha; + /* x_k=x_k-1+alpha_k*p_k */ + nIncrem01(xvec,pvec,alpha,NULL,&Timing_OneIterComm); + /* r_k=r_k-1-alpha_k*A.p_k and |r_k|^2 */ + nIncrem01(rvec,Avecbuffer,-alpha,&inprodRplus1,&Timing_OneIterComm); + /* initialize ro_old -> ro_k-2 for next iteration */ + ro_old=ro_new; + + Timing_OneIter=GET_TIME()-tstart; + /* check progress */ + ProgressReport(inprodRplus1); + } /* end of the big while loop */ + AfterIterFinished(); } -//============================================================ +/*============================================================*/ static void BiCGStab(const int mc) -// Bi-Conjugate Gradient Stabilized + /* Bi-Conjugate Gradient Stabilized */ { - double inprodRplus1; // inner product of r_k+1 - double denumOmega,dtmp; - doublecomplex beta,ro_new,ro_old,omega,alpha,temp1,temp2; - doublecomplex *v,*s,*rtilda; - TIME_TYPE tstart; - chp_data scalars[3],vectors[3]; - - max_count=mc; - // rename some vectors - v=vec1; - s=vec2; - rtilda=vec3; - // initialize data structure for checkpoints - scalars[0].ptr=&ro_old; - scalars[1].ptr=ω - scalars[2].ptr=α - scalars[0].size=scalars[1].size=scalars[2].size=sizeof(doublecomplex); - vectors[0].ptr=v; - vectors[1].ptr=s; - vectors[2].ptr=rtilda; - vectors[0].size=vectors[1].size=vectors[2].size=sizeof(doublecomplex); - iter_data.sc=scalars; - iter_data.sc_N=3; - iter_data.vec=vectors; - iter_data.vec_N=3; - // initialization of constants and vectors - if (load_chpoint) LoadIterChpoint(); - else nCopy(rtilda,rvec); // r~=r_0 - Timing_InitIter=GET_TIME()-tstart_CE; // initialization complete - // main iteration cycle - while (inprodR>=epsB && count<=maxiter && counter<=max_count && !chp_exit) { - Timing_OneIterComm = 0; // initialize time - tstart = GET_TIME(); - // ro_k-1=r_k-1.r~ ; check for ro_k-1!=0 - nDotProd(rvec,rtilda,ro_new,&Timing_OneIterComm); - dtmp=cAbs(ro_new)/inprodR; - D2z("(r~.r)/(r.r)=%.2g",dtmp); - if (dtmp ro_k-2 for next iteration - cEqual(ro_new,ro_old); - /* take time stamp here, not to measure time of incomplete iteration - * (interrupted at the check above). - */ - Timing_OneIter=GET_TIME()-tstart; - } - // check progress - ProgressReport(inprodRplus1); - } // end of the big while loop - AfterIterFinished(); + double inprodRplus1; /* inner product of rk+1 */ + double denumOmega,dtmp; + doublecomplex beta,ro_new,ro_old,omega,alpha,temp1,temp2; + doublecomplex *v,*s,*rtilda; + TIME_TYPE tstart; + chp_data scalars[3],vectors[3]; + + max_count=mc; + /* rename some vectors */ + v=vec1; + s=vec2; + rtilda=vec3; + /* initialize data structure for checkpoints */ + scalars[0].ptr=&ro_old; + scalars[1].ptr=ω + scalars[2].ptr=α + scalars[0].size=scalars[1].size=scalars[2].size=sizeof(doublecomplex); + vectors[0].ptr=v; + vectors[1].ptr=s; + vectors[2].ptr=rtilda; + vectors[0].size=vectors[1].size=vectors[2].size=sizeof(doublecomplex); + iter_data.sc=scalars; + iter_data.sc_N=3; + iter_data.vec=vectors; + iter_data.vec_N=3; + /* initialization of constants and vectors */ + if (load_chpoint) LoadIterChpoint(); + else nCopy(rtilda,rvec); /* r~=r_0 */ + Timing_InitIter=GET_TIME()-tstart_CE; /* initialization complete */ + /* main iteration cycle */ + while (inprodR>=epsB && count<=maxiter && counter<=max_count && !chp_exit) { + Timing_OneIterComm = 0; /* initialize time */ + tstart = GET_TIME(); + /* ro_k-1=r_k-1.r~ ; check for ro_k-1!=0 */ + nDotProd(rvec,rtilda,ro_new,&Timing_OneIterComm); + dtmp=cAbs(ro_new)/inprodR; + if (dtmp ro_k-2 for next iteration */ + cEqual(ro_new,ro_old); + /* take time stamp here, not to measure time of incomplete iteration + (interrupted at the check above */ + Timing_OneIter=GET_TIME()-tstart; + } + /* check progress */ + ProgressReport(inprodRplus1); + } /* end of the big while loop */ + AfterIterFinished(); } -//============================================================ +/*============================================================*/ static void BiCG_CS(const int mc) -// Bi-Conjugate Gradient for Complex Symmetric systems + /* Bi-Conjugate Gradient for Complex Symmetric systems */ { - double inprodRplus1; // inner product of r_k+1 - doublecomplex alpha, mu; - doublecomplex beta,ro_new,ro_old,temp; - double dtmp,abs_ro_new; - TIME_TYPE tstart; - chp_data scalars[1]; - - max_count=mc; - // initialize data structure for checkpoints - scalars[0].ptr=&ro_old; - scalars[0].size=sizeof(doublecomplex); - iter_data.sc=scalars; - iter_data.sc_N=1; - iter_data.vec=NULL; - iter_data.vec_N=0; - // initialization of constants and vectors - if (load_chpoint) LoadIterChpoint(); - Timing_InitIter = GET_TIME() - tstart_CE; // initialization complete - // main iteration cycle - while (inprodR>=epsB && count<=maxiter && counter<=max_count && !chp_exit) { - Timing_OneIterComm=0; // initialize time - tstart=GET_TIME(); - // ro_k-1=r_k-1(*).r_k-1; check for ro_k-1!=0 - nDotProdSelf_conj(rvec,ro_new,&Timing_OneIterComm); - abs_ro_new=cAbs(ro_new); - dtmp=abs_ro_new/inprodR; - D2z("(rT.r)/(r.r)=%.2g",dtmp); - if (dtmp ro_k-2 for next iteration - cEqual(ro_new,ro_old); - Timing_OneIter=GET_TIME()-tstart; - // check progress - ProgressReport(inprodRplus1); - } // end of the big while loop - AfterIterFinished(); + double inprodRplus1; /* inner product of rk+1 */ + doublecomplex alpha, mu; + doublecomplex beta,ro_new,ro_old,temp; + double dtmp,abs_ro_new; + TIME_TYPE tstart; + chp_data scalars[1]; + + max_count=mc; + /* initialize data structure for checkpoints */ + scalars[0].ptr=&ro_old; + scalars[0].size=sizeof(doublecomplex); + iter_data.sc=scalars; + iter_data.sc_N=1; + iter_data.vec=NULL; + iter_data.vec_N=0; + /* initialization of constants and vectors */ + if (load_chpoint) LoadIterChpoint(); + Timing_InitIter = GET_TIME() - tstart_CE; /* initialization complete */ + /* main iteration cycle */ + while (inprodR>=epsB && count<=maxiter && counter<=max_count && !chp_exit) { + Timing_OneIterComm=0; /* initialize time */ + tstart=GET_TIME(); + /* ro_k-1=r_k-1(*).r_k-1; check for ro_k-1!=0 */ + nDotProdSelf_conj(rvec,ro_new,&Timing_OneIterComm); + abs_ro_new=cAbs(ro_new); + dtmp=abs_ro_new/inprodR; + if (dtmp ro_k-2 for next iteration */ + cEqual(ro_new,ro_old); + + Timing_OneIter=GET_TIME()-tstart; + /* check progress */ + ProgressReport(inprodRplus1); + } /* end of the big while loop */ + AfterIterFinished(); } -//============================================================ +/*============================================================*/ static void QMR_CS(const int mc) -// Quasi Minimum Residual for Complex Symmetric systems + /* Quasi Minimum Residual for Complex Symmetric systems */ { - double inprodRplus1; // inner product of r_k+1 - double c_old,c_new,omega_old,omega_new,zetaabs,dtmp1,dtmp2; - doublecomplex alpha,beta,theta,eta,zeta,zetatilda,tau,tautilda; - doublecomplex s_new,s_old,temp1,temp2,temp3,temp4; - doublecomplex *v,*vtilda,*p_new,*p_old; - TIME_TYPE tstart; - chp_data scalars[8],vectors[3]; - - max_count=mc; - // rename some vectors - v=vec1; // v_k - vtilda=vec2; // also v_k-1 - p_new=pvec; // p_k - p_old=vec3; // p_k-1 - // initialize data structure for checkpoints - scalars[0].ptr=&omega_old; - scalars[1].ptr=&omega_new; - scalars[2].ptr=&c_old; - scalars[3].ptr=&c_new; - scalars[4].ptr=β - scalars[5].ptr=&tautilda; - scalars[6].ptr=&s_old; - scalars[7].ptr=&s_new; - scalars[0].size=scalars[1].size=scalars[2].size=scalars[3].size=sizeof(double); - scalars[4].size=scalars[5].size=scalars[6].size=scalars[7].size=sizeof(doublecomplex); - vectors[0].ptr=v; - vectors[1].ptr=vtilda; - vectors[2].ptr=p_old; - vectors[0].size=vectors[1].size=vectors[2].size=sizeof(doublecomplex); - iter_data.sc=scalars; - iter_data.sc_N=8; - iter_data.vec=vectors; - iter_data.vec_N=3; - // initialization of constants and vectors - if (load_chpoint) { - LoadIterChpoint(); - // change pointers names according to count parity - if (IS_EVEN(count)) SwapPointers(&v,&vtilda); - else SwapPointers(&p_old,&p_new); - } - else { - // omega_0=||v_0||=0 - omega_old=0.0; - // beta_1=sqrt(v~_1(*).v~_1); omega_1=||v~_1||/|beta_1|; (v~_1=r_0) - nDotProdSelf_conj(rvec,temp1,&Timing_InitIter_comm); - cSqrt(temp1,beta); - omega_new=sqrt(inprodR)/cAbs(beta); // inprodR=nNorm2(r_0) - // v_1=v~_1/beta_1 - cInv(beta,temp1); - nMult_cmplx(v,rvec,temp1); - // tau~_1=omega_1*beta_1 - cMultReal(omega_new,beta,tautilda); - // c_0=c_-1=1; s_0=s_-1=0 - c_new=c_old=1.0; - s_new[RE]=s_new[IM]=s_old[RE]=s_old[IM]=0.0; - } - Timing_InitIter = GET_TIME() - tstart_CE; // initialization complete - // main iteration cycle - while (inprodR>=epsB && count<=maxiter && counter<=max_count && !chp_exit) { - Timing_OneIterComm=0; // initialize time - tstart=GET_TIME(); - // check for zero beta - dtmp1=cAbs2(beta)/inprodR; - D2z("(vT.v)/(r.r)=%.2g",dtmp1); - if (dtmp1=epsB && count<=maxiter && counter<=max_count && !chp_exit) { + Timing_OneIterComm=0; /* initialize time */ + tstart=GET_TIME(); + /* check for zero beta */ + dtmp1=cAbs2(beta)/inprodR; + if (dtmp1maxiter) LogError(EC_ERROR,ONE_POS, - "Iterations haven't converged in maximum allowed number of iterations (%d)",maxiter); - else if (counter>max_count) LogError(EC_ERROR,ONE_POS, - "Residual norm haven't decreased for maximum allowed number of iterations (%d)",max_count); - // post-processing - /* x is a solution of a modified system, not exactly internal field; should not be used further - * except for adaptive technique (as starting vector for next system) - */ - nMult_mat(pvec,xvec,cc_sqrt); /* p is now vector of polarizations. Can be used to calculate - * e.g. scattered field faster. - */ - if (chp_exit) return CHP_EXIT; // check if exiting after checkpoint - return count; + double temp; + char tmp_str[MAX_LINE]; + + method=method_in; + chp_exit=FALSE; + chp_skip=FALSE; + /* instead of solving system (I+D.C).x=b , C - diagonal matrix with couple constants + * D - symmetric interaction matrix of Green's tensor + * we solve system (I+S.D.S).(S.x)=(S.b), S=sqrt(C), them + * total interaction matrix is symmetric and Jacobi-preconditioned for any discribution of m */ + + /* p=b=(S.Einc) is right part of the linear system; used only here, + in iteration methods themselves p is completely different vector */ + if (!load_chpoint) { + nMult_mat(pvec,Einc,cc_sqrt); + + temp=nNorm2(pvec,&Timing_InitIter_comm); /* |r_0|^2 when x_0=0 */ + resid_scale=1/temp; + /* calculate A.(x_0=b), r_0=b-A.(x_0=b) and |r_0|^2 */ + MatVec(pvec,Avecbuffer,NULL,FALSE); + nSubtr(rvec,pvec,Avecbuffer,&inprodR,&Timing_InitIter_comm); + /* check which x_0 is better */ + if (tempmaxiter) LogError(EC_ERROR,ONE_POS, + "Iterations haven't converged in maximum allowed number of iterations (%d)",maxiter); + else if (counter>max_count) LogError(EC_ERROR,ONE_POS, + "Residual norm haven't decreased for maximum allowed number of iterations (%d)",max_count); + /* postprocessing */ + /* x is a solution of a modified system, not exactly internal field + should not be used further except fot adaptive technique + (as starting vector for next system) */ + nMult_mat(pvec,xvec,cc_sqrt); /* p is now vector of polarizations - */ + /* faster to calculate ,e.g. scattered field */ + /* check if exiting after checkpoint */ + if (chp_exit) return CHP_EXIT; + return count; } diff --git a/src/linalg.c b/src/linalg.c index 04a51eeb..946b4df1 100644 --- a/src/linalg.c +++ b/src/linalg.c @@ -8,7 +8,7 @@ * to be a principal limitation of C standard (some compilers may work, some produce * warnings) * - * Copyright (C) 2006-2008 University of Amsterdam + * Copyright (C) 2006-2007 University of Amsterdam * This code is covered by the GNU General Public License. */ #include @@ -17,492 +17,494 @@ #include "comm.h" #include "linalg.h" -//============================================================ +/*============================================================*/ void nInit(doublecomplex *a) -// initialize vector a with null values + /* initialize vector a with null values */ { - size_t i; + size_t i; #pragma loop count (100000) #pragma ivdep - for (i=0;i $@.$$$$; \ sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ rm -f $@.$$$$ diff --git a/src/make_particle.c b/src/make_particle.c index e49c387a..62304aa7 100644 --- a/src/make_particle.c +++ b/src/make_particle.c @@ -16,8 +16,6 @@ * (not used now) * ----------------------------------------------------------- * Shapes 'capsule' and 'egg' are implemented by Daniel Hahn and Richard Joseph. - * ----------------------------------------------------------- - * Shape 'axisymmetric' is based on the code by Konstantin Gilev * * Currently is developed by Maxim Yurkin * @@ -27,9 +25,8 @@ #include #include #include -#include // for time and clock (used for random seed) +#include /* for time and clock (used for random seed) */ #include -#include #include "vars.h" #include "const.h" #include "cmplx.h" @@ -42,16 +39,16 @@ #include "timing.h" #include "mt19937ar.h" -// SEMI-GLOBAL VARIABLES +/* SEMI-GLOBAL VARIABLES */ -// defined and initialized in param.c +/* defined and initialized in param.c */ extern const int shape,sh_Npars; extern const double sh_pars[]; extern const int sym_type; extern const double lambda; extern double sizeX,dpl,a_eq; extern const int jagged; -extern const char shape_fname[]; +extern const char aggregate_file[]; extern char shapename[]; extern char save_geom_fname[]; extern const int volcor,save_geom; @@ -60,1119 +57,825 @@ extern const double gr_vf; extern double gr_d; extern const int gr_mat; extern int sg_format; -extern int store_grans; -// defined and initialized in timing.c +/* defined and initialized in timing.c */ extern TIME_TYPE Timing_Particle,Timing_Granul,Timing_Granul_comm; -// used in param.c -int volcor_used; // volume correction was actually employed -char sh_form_str[MAX_PARAGRAPH]; // string for log file with shape parameters -size_t gr_N; // number of granules -double gr_vf_real; // actual granules volume fraction -double mat_count[MAX_NMAT+1]; // number of dipoles in each domain - -// LOCAL VARIABLES - -static const char geom_format[]="%d %d %d\n"; // format of the geom file -static const char geom_format_ext[]="%d %d %d %d\n"; // extended format of the geom file -/* C99 allows use of %zu for size_t variables, but this is not supported by MinGW due to dependence - * on Microsoft libraries - */ -static const char ddscat_format[]="%ld %d %d %d %d %d %d\n";// DDSCAT shape format (FRMFIL) -// ratio of scatterer volume to enclosing cube; used for dpl correction and initialization by a_eq -static double volume_ratio; -static double Ndip; // total number of dipoles (in a circumscribing cube) -static double dpl_def; // default value of dpl -static int minX,minY,minZ; // minimum values of dipole positions in dipole file -static FILE *dipfile; // handle of dipole file -static int read_format; // format of dipole file, which is read -static char linebuf[BUF_LINE]; // buffer for reading lines from dipole file -double cX,cY,cZ; // center for DipoleCoord, it is sometimes used in PlaceGranules -// shape parameters +/* used in param.c */ +int volcor_used; /* volume correction was actually employed */ +char sh_form_str[MAX_PARAGRAPH]; /* string for log file with shape parameters */ +size_t gr_N; /* number of granules */ +double gr_vf_real; /* actual granules volume fraction */ +double mat_count[MAX_NMAT+1]; /* number of dipoles in each domain */ + +/* LOCAL VARIABLES */ + +static const char geom_format[]="%d %d %d\n"; /* format of the geom file */ +static const char geom_format_ext[]="%d %d %d %d\n"; /* extended format of the geom file */ +static const char ddscat_format[]="%d %d %d %d %d %d %d\n";/* ddscat shape format (FRMFIL) */ +static double volume_ratio; /* ratio of scatterer volume to enclosing cube; + used for dpl correction and initialization by a_eq */ +static double Ndip; /* total number of dipoles (in a circumscribing cube) */ +static double dpl_def; /* default value of dpl */ +static int minX,minY,minZ; /* minimum values of dipole positions in dipole file */ +static FILE *dipfile; /* handle of dipole file */ +static int read_format; /* format of dipole file, which is read */ +static char linebuf[BUF_LINE]; /* buffer for reading lines from dipole file */ +/* shape parameters */ static double coat_ratio,coat_x,coat_y,coat_z,coat_r2; -static double ad2,egnu,egeps,egz0; // for egg +static double ad2,egnu,egeps,egz0; /* for egg */ static double hdratio,invsqY,invsqZ,haspY,haspZ; -static double P,Q,R,S; // for RBC -// for axisymmetric; all coordinates defined here are relative -static double *contSegRoMin,*contSegRoMax,*contRo,*contZ; -static double contCurRo, contCurZ, contRoSqMin; -static int contNseg; -struct segment { - bool single; // whether segment consists of a single joint - int first; // index of the first point in the segment - int last; // index of the last point in the segment - double zmin; // minimum z-coordinate of the segment points - double zmax; // maximum z-coordinate of the segment points - double romid; // ro-coordinate of the point in the middle - struct segment *left; // pointer to left subsegment - struct segment *right; // pointer to right subsegment - double slope; // only for single; (z[i+1]-z[i])/(ro[i+1]-ro[i]) - double add; // only for single; ro[i](1-slope); -}; -struct segment *contSeg; +static double P,Q,R,S; /* for RBC */ /* TO ADD NEW SHAPE - * Add here all internal variables (aspect ratios, etc.), which you initialize in InitShape() - * and use in MakeParticle() afterwards. If you need local, intermediate variables, put them into - * the beginning of the corresponding function. - * Add descriptive comments, use 'static'. - */ + Add here all internal variables (aspect ratios, etc.), which you initialize in InitShape() + and use in MakeParticle() afterwards. If you need local, intermediate variables, put them into + the beginning of the corresponding function. + Add descriptive comments, use 'static'. */ -// temporary arrays before their real counterparts are allocated +/* temporary arrays before their real counterparts are allocated */ static unsigned char *material_tmp; static double *DipoleCoord_tmp; static unsigned short *position_tmp; -//============================================================ +/*============================================================*/ static void SaveGeometry(void) -// saves dipole configuration to .geom file + /* saves dipole configuration to .geom file */ { - char fname[MAX_FNAME]; - FILE *geom; - size_t i,j; - int mat; - - // create save_geom_fname if not specified - if (save_geom_fname[0]==0) sprintf(save_geom_fname,"%s.geom",shapename); - // automatically change format if needed - if (sg_format==SF_TEXT && Nmat>1) sg_format=SF_TEXT_EXT; - // choose filename + char fname[MAX_FNAME]; + FILE *geom; + size_t i,j; + int mat; + + /* create save_geom_fname if not specified */ + if (save_geom_fname[0]==0) + sprintf(save_geom_fname,"%s.geom",shapename); + /* automatically change format if needed */ + if (sg_format==SF_TEXT && Nmat>1) sg_format=SF_TEXT_EXT; + /* choose filename */ #ifdef PARALLEL - sprintf(fname,"%s/" F_GEOM_TMP,directory,ringid); + sprintf(fname,"%s/" F_GEOM_TMP,directory,ringid); #else - sprintf(fname,"%s/%s",directory,save_geom_fname); + sprintf(fname,"%s/%s",directory,save_geom_fname); #endif - geom=FOpenErr(fname,"w",ALL_POS); - // print head of file + geom=FOpenErr(fname,"w",ALL_POS); + /* print head of file */ #ifdef PARALLEL - if (ringid==0) { // this condition can be different from being ROOT + if (ringid==0) { /* this condition can be different from being ROOT */ #endif - if (sg_format==SF_TEXT || sg_format==SF_TEXT_EXT) { - fprintf(geom,"#generated by ADDA v." ADDA_VERSION "\n" - "#shape: '%s'\n" - "#box size: %dx%dx%d\n",shapename,boxX,boxY,boxZ); - if (sg_format==SF_TEXT_EXT) fprintf(geom,"Nmat=%d\n",Nmat); - } - else if (sg_format==SF_DDSCAT) - fprintf(geom,"shape: '%s'; box size: %dx%dx%d; generated by ADDA v." ADDA_VERSION "\n" - "%0.f = NAT\n" - "1 0 0 = A_1 vector\n" - "0 1 0 = A_2 vector\n" - "1 1 1 = lattice spacings (d_x,d_y,d_z)/d\n" - "JA IX IY IZ ICOMP(x,y,z)\n",shapename,boxX,boxY,boxZ,nvoid_Ndip); + if (sg_format==SF_TEXT || sg_format==SF_TEXT_EXT) { + fprintf(geom,"#generated by ADDA v." ADDA_VERSION "\n"\ + "#shape: '%s'\n"\ + "#box size: %dx%dx%d\n",shapename,boxX,boxY,boxZ); + if (sg_format==SF_TEXT_EXT) fprintf(geom,"Nmat=%d\n",Nmat); + } + else if (sg_format==SF_DDSCAT) + fprintf(geom,"shape: '%s'; box size: %dx%dx%d; generated by ADDA v." ADDA_VERSION "\n"\ + "%0.f = NAT\n"\ + "1 0 0 = A_1 vector\n"\ + "0 1 0 = A_2 vector\n"\ + "1 1 1 = lattice spacings (d_x,d_y,d_z)/d\n"\ + "JA IX IY IZ ICOMP(x,y,z)\n",shapename,boxX,boxY,boxZ,nvoid_Ndip); #ifdef PARALLEL - } // end of if + } /* end of if */ #endif - // save geometry - if (sg_format==SF_TEXT) for(i=0;i %d)", - fname,*line,BUF_LINE-1); - } - return res; + char *res; + + res=fgets(linebuf,BUF_LINE,file); + if (res!=NULL) { + (*line)++; + if (strchr(linebuf,'\n')==NULL && !feof(file)) LogError(EC_ERROR,ONE,s_fname,s_line, + "Buffer overflow while scanning lines in file '%s' (size of line %d > %d)", + fname,*line,BUF_LINE-1); + } + return res; } -//=========================================================== +/*===========================================================*/ INLINE void SkipNLines(FILE *file,int n) -// skips n lines from the file starting from current position in a file + /* skips n lines from the file starting from current position in a file */ { - while (n>0) { - SkipFullLine(file); - n--; - } + while (n>0) { + SkipFullLine(file); + n--; + } } -//=========================================================== +/*===========================================================*/ static int SkipComments(FILE *file) -/* skips comments (#...), all lines, starting from current position in a file. - * returns number of lines skipped - */ + /* skips comments (#...), all lines, starting from current position in a file + returns number of lines skipped */ { - int lines=0,ch; + int lines=0,ch; - while ((ch=fgetc(file))=='#') { - SkipFullLine(file); - lines++; - } - if (ch!=EOF) ungetc(ch,file); + while ((ch=fgetc(file))=='#') { + SkipFullLine(file); + lines++; + } + if (ch!=EOF) ungetc(ch,file); - return lines; + return lines; } -//=========================================================== -#define DDSCAT_HL 6 // number of header lines in DDSCAT format +/*===========================================================*/ +#define DDSCAT_HL 6 /* number of header lines in DDSCAT format */ -static void InitDipFile(const char *fname,int *bX,int *bY,int *bZ,int *Nm) -/* read dipole file first to determine box sizes and Nmat; input is not checked for very large - * numbers (integer overflows) to increase speed; this function opens file for reading, the file is - * closed in ReadDipFile. - */ +static void InitDipFile(const char *fname,int * bX,int *bY,int *bZ,int *Nm) + /* read dipole file first to determine box sizes and Nmat; + input is not checked for very large numbers (integer overflows) to increase speed + this funstion opens file for reading, the file is closed in ReadDipFile */ { - int x,y,z,mat,line,scanned,mustbe,skiplines,anis_warned; - long tl; // dumb variable - int t2,t3; // dumb variables - int maxX,maxY,maxZ,maxN; - char formtext[MAX_LINE]; - - dipfile=FOpenErr(fname,"r",ALL_POS); - read_format=UNDEF; - /* test for DDSCAT format; in not-DDSCAT format, the line scanned below may be a long comment; - * therefore we first skip all comments - */ - line=SkipComments(dipfile); - if (line<=DDSCAT_HL) { - SkipNLines(dipfile,DDSCAT_HL-line); - if (FgetsError(dipfile,fname,&line,POSIT)!=NULL - && sscanf(linebuf,ddscat_format,&tl,&x,&y,&z,&mat,&t2,&t3)==7) { - read_format=SF_DDSCAT; - strcpy(formtext,"DDSCAT format (FRMFIL)"); - mustbe=7; - line=DDSCAT_HL; - fseek(dipfile,0,SEEK_SET); - SkipNLines(dipfile,line); - } - } - // if format is not yet determined, test for ADDA text formats - if (read_format==UNDEF) { - fseek(dipfile,0,SEEK_SET); - line=SkipComments(dipfile); - /* scanf and analyze Nmat; if there is blank line between comments and Nmat, it fails later; - * the value of Nmat obtained here is not actually relevant, the main factor is maximum - * domain number among all dipoles. - */ - scanned=fscanf(dipfile,"Nmat=%d\n",Nm); - if (scanned==EOF) LogError(EC_ERROR,ONE_POS,"No dipole positions are found in %s",fname); - else if (scanned==0) { // no "Nmat=..." - read_format=SF_TEXT; - strcpy(formtext,"ADDA text format (single domain)"); - *Nm=1; - mustbe=3; - } - else { // "Nmat=..." present - read_format=SF_TEXT_EXT; - strcpy(formtext,"ADDA text format (multi-domain)"); - mustbe=4; - line++; - } - } - // scan main part of the file - skiplines=line; - maxX=maxY=maxZ=INT_MIN; - minX=minY=minZ=INT_MAX; - maxN=1; - anis_warned=FALSE; - // reading is performed in lines - while(FgetsError(dipfile,fname,&line,POSIT)!=NULL) { - // scan numbers in a line - if (read_format==SF_TEXT) scanned=sscanf(linebuf,geom_format,&x,&y,&z); - else if (read_format==SF_TEXT_EXT) scanned=sscanf(linebuf,geom_format_ext,&x,&y,&z,&mat); - // for ddscat format, only first material is used, other two are ignored - else if (read_format==SF_DDSCAT) { - scanned=sscanf(linebuf,ddscat_format,&tl,&x,&y,&z,&mat,&t2,&t3); - if (!anis_warned && (t2!=mat || t3!=mat)) { - LogError(EC_WARN,ONE_POS,"Anisotropic dipoles are detected in file %s (first on " - "line %d). ADDA ignores this anisotropy, using only the identifier of " - "x-component of refractive index as domain number",fname,line); - anis_warned=TRUE; - } - } - // if sscanf returns EOF, that is a blank line -> just skip - if (scanned!=EOF) { - if (scanned!=mustbe) // this in most cases indicates wrong format - LogError(EC_ERROR,ONE_POS,"%s was detected, but error occurred during scanning " - "of line %d from dipole file %s",formtext,line,fname); - if (read_format!=SF_TEXT) { - if (mat<=0) LogError(EC_ERROR,ONE_POS,"%s was detected, but nonpositive material " - "number (%d) encountered during scanning of line %d from dipole file %s", - formtext,mat,line,fname); - else if (mat>maxN) maxN=mat; - } - // update maxima and minima - if (x>maxX) maxX=x; - if (xmaxY) maxY=y; - if (ymaxZ) maxZ=z; - if (z just skip */ + if (scanned!=EOF) { + if (scanned!=mustbe) /* this in most cases indicates wrong format */ + LogError(EC_ERROR,ONE_POS,"%s was detected, but error occured during scaning of line %d "\ + "from dipole file %s",formtext,line,fname); + if (read_format!=SF_TEXT) { + if (mat<=0) LogError(EC_ERROR,ONE_POS,"%s was detected, but nonpositive material number "\ + "(%d) encountered during scaning of line %d from dipole file %s",formtext,mat,line,fname); + else if (mat>maxN) maxN=mat; + } + /* update maximums and minimums */ + if (x>maxX) maxX=x; + if (xmaxY) maxY=y; + if (ymaxZ) maxZ=z; + if (z just skip - if (scanned!=EOF) { - // shift dipole position to be nonnegative - x0-=minX; - y0-=minY; - z0-=minZ; - // initialize box jagged*jagged*jagged instead of one dipole - for (z=jagged*z0;z=local_z0 && z just skip */ + if (scanned!=EOF) { + /* shift dipole position to be nonnegative */ + x0-=minX; + y0-=minY; + z0-=minZ; + /* initialize box jagged*jagged*jagged instead of one dipole */ + for (z=jagged*z0;z=local_z0 && zfirst]==contRo[seg->first+1]) (seg->first)++; - while (contRo[seg->last-1]==contRo[seg->last]) (seg->last)--; - if (seg->first+1 == seg->last) { // segment with a single fragment - seg->single=true; - seg->zmin=MIN(contZ[seg->first],contZ[seg->last]); - seg->zmax=MAX(contZ[seg->first],contZ[seg->last]); - seg->slope=(contZ[seg->last]-contZ[seg->first])/(contRo[seg->last]-contRo[seg->first]); - seg->add=contZ[seg->first]-contRo[seg->first]*seg->slope; - } - else { // divide segment into two, and initialize each of them - seg->single=false; - i=(seg->first+seg->last)/2; - seg->romid=contRo[i]; - // construct subsegments - s1=ALLOCATE_SEGMENTS(1); - s2=ALLOCATE_SEGMENTS(1); - s1->first=seg->first; - s1->last=s2->first=i; - s2->last=seg->last; - // initialize subsegments - InitContourSegment(s1,increasing); - InitContourSegment(s2,increasing); - // calculate zmax and zmin - seg->zmax=MAX(s1->zmax,s2->zmax); - seg->zmin=MIN(s1->zmin,s2->zmin); - // assign new segments to left and right based on 'increasing' - if (increasing) { - seg->left=s1; - seg->right=s2; - } - else { - seg->left=s2; - seg->right=s1; - } - } -} - -//=========================================================== -#define CHUNK_SIZE 128 // how many numbers are allocated at once for adjustable arrays - -static void InitContour(const char *fname,double *ratio,double *shSize) -/* Reads a contour from the file, rotates it so that it starts from a local minimum in ro, - * then divides it into monotonic (over ro) segments. It produces data, which are later used to - * test each dipole for being inside the contour. Segments are either increasing or non-decreasing. - */ -{ - int line; // current line number - int nr; // number of contour points read from the file - int size; // current size of the allocated memory for contour - int i,j,scanned; - double *bufRo,*bufZ; // temporary buffers - int *index; - double ro,z,romin,romax,zmin,zmax,mult,zmid; - FILE* file; - bool increasing; - - D("InitContour has started"); - // Read contour from file - file=FOpenErr(fname,"r",ALL_POS); - line=SkipComments(file); - size=CHUNK_SIZE; - MALLOC_VECTOR(bufRo,double,size,ALL); - MALLOC_VECTOR(bufZ,double,size,ALL); - nr=0; - // reading is performed in lines - while(FgetsError(file,fname,&line,POSIT)!=NULL) { - // scan numbers in a line - scanned=sscanf(linebuf,"%lf %lf",&ro,&z); - // if sscanf returns EOF, that is a blank line -> just skip - if (scanned!=EOF) { - if (scanned!=2) // this in most cases indicates wrong format - LogError(EC_ERROR,ONE_POS,"Error occurred during scanning of line %d from contour " - "file %s",line,fname); - // check for consistency of input - if (ro<0) LogError(EC_ERROR,ONE_POS,"Negative ro-coordinate is found on line %d in " - "contour file %s",line,fname); - // update extreme values - if (nr==0) { - zmax=zmin=z; - romax=romin=ro; - } - else { - if (z>zmax) zmax=z; - if (zromax) romax=ro; - if (ro= size) { - size+=CHUNK_SIZE; - REALLOC_DVECTOR(bufRo,size,ALL); - REALLOC_DVECTOR(bufZ,size,ALL); - } - bufRo[nr]=ro; - bufZ[nr]=z; - nr++; - } - } - FCloseErr(file,fname,ALL_POS); - // Check number of points read - if (nr<3) LogError(EC_ERROR,ONE_POS, - "Contour from file %s contains less than three points",fname); - - // Determine initial point with local minimum ro[i-1]>=ro[i]=bufRo[i+1]) i++; - if (i==0) { // first point is a minimum candidate - if (bufRo[0]>bufRo[nr-1]) { // if required, search backwards; guaranteed to converge - i=nr-1; - while (bufRo[i]>bufRo[i-1]) i--; - } - } - // if the whole contour is non-decreasing, check for constancy - else if (i==nr-1 && bufRo[nr-1]==bufRo[0]) LogError(EC_ERROR,ONE_POS, - "Contour from file %s has zero area. Hence the scatterer is void",fname); - /* Construct working contour so that its first point = last and is a local minimum. It is done - * by rotating buf and adding one extra point. Then free the buffer. - */ - MALLOC_VECTOR(contRo,double,nr+1,ALL); - memcpy(contRo,bufRo+i,(nr-i)*sizeof(double)); - memcpy(contRo+nr-i,bufRo,i*sizeof(double)); - contRo[nr]=contRo[0]; - Free_general(bufRo); - // same for Z vectors - MALLOC_VECTOR(contZ,double,nr+1,ALL); - memcpy(contZ,bufZ+i,(nr-i)*sizeof(double)); - memcpy(contZ+nr-i,bufZ,i*sizeof(double)); - contZ[nr]=contZ[0]; - Free_general(bufZ); - // scale coordinates to be relative to total diameter, and centered (by z) around 0 - mult=1/(2*romax); - zmid=(zmax+zmin)/2; - *ratio=(zmax-zmin)*mult; - *shSize=2*romax; - contRoSqMin=romin*romin*mult*mult; - for (i=0;i<=nr;i++) { - contRo[i]*=mult; - contZ[i]=(contZ[i]-zmid)*mult; - } - - /* divide the contour into the segments; actually only the index is constructed marking end - * points of the segments - */ - MALLOC_VECTOR(index,int,nr+1,ALL); // this is enough, even if all segments are of one joint - index[0]=0; - i=j=1; - increasing=true; - while (izmin) return true; - else if (contCurZ > seg->zmax) return false; - else if (seg->single) return (contCurZ < seg->add + contCurRo*seg->slope); - else seg=(contCurRoromid ? seg->left : seg->right); - } -} - -//========================================================== - -void FreeContourSegment(struct segment *seg) -/* recursively frees memory allocated for contour segments - * Recursive function calls incurs certain overhead, however here it is not critical. - */ -{ - if (!(seg->single)) { - FreeContourSegment(seg->left); - FreeContourSegment(seg->right); - } -} - -//========================================================== - -#define KEY_LENGTH 2 // length of key for initialization of random generator -#define MAX_ZERO_FITS 1E4 // maximum number of zero fits in a row (each - many granules) -#define MAX_FALSE_SKIP 10 // number of false skips in granule placement to complete the set -#define MAX_FALSE_SKIP_SMALL 10 // the same for small granules -#define MAX_GR_SET USHRT_MAX // maximum size of granule set -#define MIN_CELL_SIZE 4.0 // minimum cell size for small granules +#define KEY_LENGTH 2 /* length of key for initialization of random generator */ +#define MAX_ZERO_FITS 1E4 /* maximum number of zero fits in a row (each - many granules) */ +#define MAX_FALSE_SKIP 10 /* number of false skips in granule placement to complete the set */ +#define MAX_FALSE_SKIP_SMALL 10 /* the same for small granules */ +#define MAX_GR_SET USHRT_MAX /* maximum size of granule set */ +#define MIN_CELL_SIZE 4.0 /* minimum cell size for small granules */ INLINE int CheckCell(const double *gr,const double *vgran,const unsigned short *tree_index, const double Di2,const int start,int *fits) -// function that checks whether granule intersects anything in the cell + /* function that checks whether granule intersects anything in the cell */ { - int index,last,index1; - double t1,t2,t3; - - last=index=start; - while (index!=MAX_GR_SET && (*fits)) { - last=index; - index1=3*index; - t1=gr[0]-vgran[index1]; - t2=gr[1]-vgran[index1+1]; - t3=gr[2]-vgran[index1+2]; - if ((t1*t1+t2*t2+t3*t3)MIN(boxX,MIN(boxY,boxZ))) LogError(EC_WARN,ONE_POS, - "Granule size is larger than minimum particle dimension"); - x0=R-0.5; - x1=boxX-R-0.5; - y0=R-0.5; - y1=boxY-R-0.5; - z0=R-0.5; - z1=boxZ-R-0.5; - // initialize auxiliary grid - CheckOverflow(MAX(boxX,MAX(boxY,boxZ))*10/Di,ONE_POS,"PlaceGranules()"); - tmp1=sqrt(3)/Di; - gX=(int)ceil((x1-x0)*tmp1); - gdX=(x1-x0)/gX; - gY=(int)ceil((y1-y0)*tmp1); - gdY=(y1-y0)/gY; - gZ=(int)ceil((z1-z0)*tmp1); - gdZ=(z1-z0)/gZ; - sm_gr=(gdX<2 || gdY<2 || gdZ<2); // sets the discrimination for small or large granules - if (sm_gr) { - PRINTZ("Using algorithm for small granules\n"); - // redefine auxiliary grid - tmp1=1/MAX(2*Di,MIN_CELL_SIZE); - gX=(int)floor((x1-x0)*tmp1); - gdX=(x1-x0)/gX; - gY=(int)floor((y1-y0)*tmp1); - gdY=(y1-y0)/gY; - gZ=(int)floor((z1-z0)*tmp1); - gdZ=(z1-z0)/gZ; - } - else { - PRINTZ("Using algorithm for large granules\n"); - gX2=2*gX; - gdXh=gdX/2; - gY2=2*gY; - gdYh=gdY/2; - gZ2=2*gZ; - gdZh=gdZ/2; - /* this sets maximum distance of neighboring cells to check; condition gdX3 can only occur if - * gX<=2 and then it doesn't make sense to take bigger sx. Absolutely analogous for y, z. - */ - if (gdX=ginZ[1]) indZ++; - kd1=MIN(ginZ[locgZ2],local_z1_coer); - } - n=count=count_gr=false_count=0; - nd=0; - // crude estimate of the probability to place a small granule into domain - if (sm_gr) overhead=Ndip/mat_count[gr_mat]; - else overhead=1; - // main cycle - while (nMAX_FALSE_SKIP_SMALL) break; - } - // real number of placed granules for this set - cur_Ngr=ig; - } - } - else { // large granules - // generate domain pattern - if (locgZ!=0) { - for (i=0;itmp1) cur_Ngr=(int)ceil(tmp1); - // generate points and quick check - ig=false_count=0; - for (ui=0;uigZ) i1=gX; - if ((j0=indY-sy)<0) j0=0; - if ((j1=indY+sy+1)>gY) j1=gY; - if ((k0=indZ-sz)<0) k0=0; - if ((k1=indZ+sz+1)>gZ) k1=gZ; - dom_index2=k0*gXY; - for (k=k0;kMAX_FALSE_SKIP) break; - } - } - // real number of placed granules for this set - cur_Ngr=ig; - } - } // end of large granules - // cast to all processors - MyBcast(&cur_Ngr,int_type,1,&Timing_Granul_comm); - MyBcast(vgran,double_type,3*cur_Ngr,&Timing_Granul_comm); - count_gr+=cur_Ngr; - // final check if granules belong to the domain - for (ig=0;ig=gr_N) break; - } - } - // save correct granule positions to file - if (store_grans && ringid==ROOT) for (ig=0;igMAX_ZERO_FITS) { - MyInnerProduct(&nd,double_type,1,&Timing_Granul_comm); - LogError(EC_ERROR,ONE_POS,"The granule generator failed to reach required volume " - "fraction (%g) of granules. %zu granules were successfully placed up to a " - "volume fraction of %g.",gr_vf,n,nd/mat_count[gr_mat]); - } - } - } - /* conversions to (unsigned long) are needed (to remove warnings) because %z printf argument is - * not yet supported by all target compiler environments - */ - PRINTZ("Granule generator: total random placements= %lu (efficiency 1 = %g)\n" - " possible granules= %lu (efficiency 2 = %g)\n", - (unsigned long)count,count_gr/(double)count,(unsigned long)count_gr, - gr_N/(double)count_gr); - MyInnerProduct(&nd,double_type,1,&Timing_Granul_comm); - // free everything - if (ringid==ROOT) { - Free_general(occup); - if (sm_gr) Free_general(tree_index); - else Free_general(dom); - } - else if (!sm_gr && locgZ!=0) Free_general(dom); - FreeGranulComm(sm_gr); - Free_general(vgran); - Free_general(vfit); - if (!sm_gr && locgZ!=0) { - Free_general(ginX); - Free_general(ginY); - Free_general(ginZ); - } - // close granule file if needed and print info - if (store_grans && ringid==ROOT) { - FCloseErr(file,fname,ONE_POS); - printf("Granule coordinates saved to file\n"); - } - return nd; + int i,j,k,zerofit,last; + size_t n,count,count_gr,false_count,ui; + size_t boxXY; + double nd; /* number of dipoles occupied by granules */ + int index,index1,index2; /* indices for dipole grid */ + int dom_index,dom_index1,dom_index2; /* indices for auxilliary grid */ + int gX,gY,gZ; /* auxilliary grid dimensions */ + size_t gXY,gr_gN; /* ... and their products */ + size_t avail; /* number of available (free) domain cells */ + int gX2,gY2,gZ2,locgZ2; + int i0,i1,j0,j1,k0,k1; + int fits; + int cur_Ngr,ig,max_Ngr; /* number of granules in a current set, index, and maximum set size */ + double gdX,gdY,gdZ,gdXh,gdYh,gdZh; /* auxilliary grid cell sizes and their halfs (h) */ + int locz0,locz1,locgZ,gr_locgN; + double R,R2,Di,Di2; /* radius and dimater of granule, and their squares */ + double x0,x1,y0,y1,z0,z1; /* where to put random number (inner box) */ + int id0,id1,jd0,jd1,kd0,kd1; /* dipoles limit that fall inside inner box */ + int Nfit; /* number of succesfully placed granules in a current set */ + double overhead; /* estimate of the overhead needed to have exactly needed N of granules */ + double tmp1,tmp2,tmp3,t1,t2,t3; + int sx,sy,sz; /* maximum shifts for checks of neighboring cells in auxilliary grid + for 'small' it is the shift in index */ + unsigned long key[KEY_LENGTH]; /* key to initialize random number generator */ + unsigned char *dom; /* information about the domain on a granule grid */ + unsigned short *occup; /* information about the occupied cells */ + int sm_gr; /* whether granules are small (then simpler algorithm is used) */ + unsigned short *tree_index; /* index for traversing granules inside one cell (for small) */ + double *vgran; /* coordinates of a set of granules */ + char *vfit; /* results of granule fitting on the grid (boolean) */ + int *ginX,*ginY,*ginZ; /* indices to find dipoles inside auxilliary grid */ + int indX,indY,indZ; /* indices for doubled auxilliary grid */ + int bit; /* bit position in char of 'dom' */ + double gr[3]; /* coordinates of a single granule */ + + /* set variables; consider jagged */ + Di=gr_d/(gridspace*jagged); + if (Di<1) LogError(EC_WARN,ONE_POS,"Granule diameter is smaller than dipole size. "\ + "It is recommended to increase resolution"); + R=Di/2; + R2=R*R; + Di2=4*R2; + boxXY=boxX*(size_t)boxY; + /* inner box */ + if (Di>MIN(boxX,MIN(boxY,boxZ))) LogError(EC_WARN,ONE_POS, + "Granule size is larger than minimum particle dimension"); + x0=R-0.5; + x1=boxX-R-0.5; + y0=R-0.5; + y1=boxY-R-0.5; + z0=R-0.5; + z1=boxZ-R-0.5; + /* initialize auxilliary grid */ + CheckOverflow(MAX(boxX,MAX(boxY,boxZ))*10/Di,ONE_POS,"PlaceGranules()"); + tmp1=sqrt(3)/Di; + gX=(int)ceil((x1-x0)*tmp1); + gdX=(x1-x0)/gX; + gY=(int)ceil((y1-y0)*tmp1); + gdY=(y1-y0)/gY; + gZ=(int)ceil((z1-z0)*tmp1); + gdZ=(z1-z0)/gZ; + sm_gr=(gdX<2 || gdY<2 || gdZ<2); /* sets the discrimination for small or large granules */ + if (sm_gr) { + PRINTZ("Using algorithm for small granules\n"); + /* redefine auxilliary grid */ + tmp1=1/MAX(2*Di,MIN_CELL_SIZE); + gX=(int)floor((x1-x0)*tmp1); + gdX=(x1-x0)/gX; + gY=(int)floor((y1-y0)*tmp1); + gdY=(y1-y0)/gY; + gZ=(int)floor((z1-z0)*tmp1); + gdZ=(z1-z0)/gZ; + } + else { + PRINTZ("Using algorithm for large granules\n"); + gX2=2*gX; + gdXh=gdX/2; + gY2=2*gY; + gdYh=gdY/2; + gZ2=2*gZ; + gdZh=gdZ/2; + /* this sets maximum distance of neighboring cells to check; condition gdX3 can only occur if + gX<=2 and then it doesn't make sense to take bigger sx. Absolutely analogous for y, z. */ + if (gdX=ginZ[1]) indZ++; + kd1=MIN(ginZ[locgZ2],local_z1_coer); + } + n=count=count_gr=false_count=0; + nd=0; + /* crude estimate of the probability to place a small granule into domain */ + if (sm_gr) overhead=Ndip/mat_count[gr_mat]; + else overhead=1; + /* main cycle */ + while (nMAX_FALSE_SKIP_SMALL) break; + } + /* real number of placed granules for this set */ + cur_Ngr=ig; + } + } + else { /* large granules */ + /* generate domain pattern */ + if (locgZ!=0) { + for (i=0;itmp1) cur_Ngr=(int)ceil(tmp1); + /* generate points and quick check */ + ig=false_count=0; + for (ui=0;uigZ) i1=gX; + if ((j0=indY-sy)<0) j0=0; + if ((j1=indY+sy+1)>gY) j1=gY; + if ((k0=indZ-sz)<0) k0=0; + if ((k1=indZ+sz+1)>gZ) k1=gZ; + dom_index2=k0*gXY; + for (k=k0;kMAX_FALSE_SKIP) break; + } + } + /* real number of placed granules for this set */ + cur_Ngr=ig; + } + } /* end of large granules */ + /* cast to all processors */ + MyBcast(&cur_Ngr,int_type,1,&Timing_Granul_comm); + MyBcast(vgran,double_type,3*cur_Ngr,&Timing_Granul_comm); + count_gr+=cur_Ngr; + /* final check if granules belong to the domain */ + for (ig=0;ig=gr_N) break; + } + } + Nfit=n-Nfit; + /* overhead is estimated based on the estimation of mean value - 1*standard deviation + for the probability of fiting one granule. It is estimated from the Bernulli statistics + k out of n successful hits. M(p)=(k+1)/(n+2); s^2(p)=(k+1)(n-k+1)/(n+3)(n+2)^2 + M(p)-s(p)=[(k+1)/(n+2)]*[1-sqrt((n-k+1)/(k+1)(n+3))]; + overhead=1/latter */ + overhead=(cur_Ngr+2)/((1-sqrt((cur_Ngr-Nfit+1)/(double)((Nfit+1)*(cur_Ngr+3))))*(Nfit+1)); + if (Nfit!=0) zerofit=0; + else { + zerofit++; + /* check if taking too long */ + if (zerofit>MAX_ZERO_FITS) { + MyInnerProduct(&nd,double_type,1,&Timing_Granul_comm); + LogError(EC_ERROR,ONE_POS, + "The granule generator failed to reach required volume fraction (%g) of granules. "\ + "%u granules were successfully placed up to a volume fraction of %g", + gr_vf,n,nd/mat_count[gr_mat]); + } + } + } + PRINTZ("Granule generator: total random placements= %u (efficiency 1 = %g)\n"\ + " possible granules= %u (efficiency 2 = %g)\n", + count,count_gr/(double)count,count_gr,gr_N/(double)count_gr); + MyInnerProduct(&nd,double_type,1,&Timing_Granul_comm); + /* free everything */ + if (ringid==ROOT) { + Free_general(occup); + if (sm_gr) Free_general(tree_index); + else Free_general(dom); + } + else if (!sm_gr && locgZ!=0) Free_general(dom); + FreeGranulComm(sm_gr); + Free_general(vgran); + Free_general(vfit); + if (!sm_gr && locgZ!=0) { + Free_general(ginX); + Free_general(ginY); + Free_general(ginZ); + } + return nd; } #undef KEY_LENGTH #undef MAX_ZERO_FITS @@ -1180,690 +883,588 @@ static double PlaceGranules(void) #undef MAX_FALSE_SKIP_SMALL #undef MAX_GR_SET #undef MIN_CELL_SIZE -//========================================================== +/*==========================================================*/ static int FitBox(const int box) -/* finds the smallest value for which program would work (should be even and divide jagged); - * the limit is also checked - */ + /* finds the smallest value for which program would work + (should be even and divide jagged); the limit is also checked */ { - int res; + int res; - if (IS_EVEN(jagged)) res=jagged*((box+jagged-1)/jagged); - else res=2*jagged*((box+2*jagged-1)/(2*jagged)); - if (res>BOX_MAX) LogError(EC_ERROR,ONE_POS, - "Derived grid size (%d) is too large (>%d)",res,BOX_MAX); - return res; + if (jagged%2==0) res=jagged*((box+jagged-1)/jagged); + else res=2*jagged*((box+2*jagged-1)/(2*jagged)); + if (res>BOX_MAX) LogError(EC_ERROR,ONE_POS, + "Derived grid size (%d) is too large (>%d)",res,BOX_MAX); + return res; } -//========================================================== +/*==========================================================*/ void InitShape(void) -/* perform of initialization of symmetries and boxY, boxZ. Estimate the volume of the particle, when - * not discretized. Check whether enough refractive indices are specified. - */ + /* perform of initialization of symmetries and boxY, boxZ + * Estimate the volume of the particle, when not discretisized. + * Check whether enough refractive indices are specified + */ { - int n_boxX,n_boxY,n_boxZ; // new values for dimensions - double n_sizeX; // new value for size - double h_d,b_d,c_d,h2,b2,c2; - double yx_ratio,zx_ratio,tmp1,tmp2,tmp3; - double diskratio,aspectY,aspectZ; - double ad,ct,ct2; // cos(theta0) and its square - TIME_TYPE tstart; - int Nmat_need,i,temp; - int dpl_def_used; // if default dpl is used for grid initialization - bool box_det_sh; // if boxX is determined by shape itself - bool size_det_sh; // if size is determined by shape itself - bool size_given_cmd; // if size is given in the command line - char sizename[MAX_LINE]; // type of input size, used in diagnostic messages - /* TO ADD NEW SHAPE - * Add here all intermediate variables, which are used only inside this function. You may as - * well use 'tmp1'-'tmp3' variables defined above. - */ - - tstart=GET_TIME(); - - box_det_sh=(shape==SH_READ); - size_det_sh=(shape==SH_AXISYMMETRIC); - /* TO ADD NEW SHAPE - * If new shape defines dimension of the computational grid or absolute size of the particle, - * change corresponding definition in one of two lines above. In many cases this is not - * relevant. - */ - - size_given_cmd=(sizeX!=UNDEF || a_eq!=UNDEF); - if (sizeX!=UNDEF) strcpy(sizename,"size"); - else if (a_eq!=UNDEF) strcpy(sizename,"eq_rad"); - // check for redundancy of input data - if (dpl!=UNDEF) { - if (size_given_cmd) { - if (boxX!=UNDEF) PrintError("Extra information is given by setting '-dpl', '-grid', " - "and '-%s'",sizename); - else if (box_det_sh) PrintError("Extra information is given by setting both '-dpl' and " - "'-%s', while shape '%s' sets the size of the grid",sizename,shapename); - } - else if (size_det_sh) { - if (boxX!=UNDEF) PrintError("Extra information is given by setting '-dpl' and '-grid', " - "while shape '%s' sets the particle size",shapename); - // currently this can't happen, but may become relevant in the future - else if (box_det_sh) PrintError("Extra information is given by setting '-dpl', while " - "shape '%s' sets both the particle size and the size of the grid",shapename); - } - } - /* calculate default dpl - 10*sqrt(max(|m|)); - * for anisotropic each component is considered separately - */ - tmp2=0; - for (i=0;i0.25*(1-coat_ratio)*(1-coat_ratio)) - PrintErrorHelp("Inner sphere is not fully inside the outer"); - SPRINTZ(sh_form_str+strlen(sh_form_str), - "\n position of inner sphere center r/d= {%.10g,%.10g,%.10g}", - coat_x,coat_y,coat_z); - } - else coat_x=coat_y=coat_z=0; // initialize default values - coat_r2=0.25*coat_ratio*coat_ratio; - volume_ratio=PI_OVER_SIX; - if (coat_x!=0) symX=symR=FALSE; - if (coat_y!=0) symY=symR=FALSE; - if (coat_z!=0) symZ=FALSE; - yx_ratio=zx_ratio=1; - Nmat_need=2; - } - else if(shape==SH_CYLINDER) { - diskratio=sh_pars[0]; - TestPositive(diskratio,"height to diameter ratio"); - SPRINTZ(sh_form_str,"cylinder; diameter(d):%%.10g, height h/d=%.10g",diskratio); - hdratio=diskratio/2; - volume_ratio=PI_OVER_FOUR*diskratio; - yx_ratio=1; - zx_ratio=diskratio; - Nmat_need=1; - } - else if (shape==SH_EGG) { - /* determined by equation: (a/r)^2=1+nu*cos(theta)-(1-eps)cos^2(theta) - * or equivalently: a^2=r^2+nu*r*z-(1-eps)z^2. Parameters must be 00). Although it may overflow faster for nu->eps, volume_ratio (below) will - * overflow even faster. It is used to shift coordinates from the computational reference - * frame (centered at z0) to the natural one - */ - egz0=-ad*egnu*(tmp1*tmp1*tmp2*tmp2)/(tmp1+tmp2); - /* (V/d^3)=(4*pi/3)*(a/d)^3*{[2(1-eps)-nu]/sqrt(eps+nu)+[2(1-eps)+nu]/sqrt(eps-nu)}/ - * /[nu^2+4(1-eps)] - */ - volume_ratio=FOUR_PI_OVER_THREE*ad2*ad*((tmp3-egnu)*tmp1+(tmp3+egnu)*tmp2) - /(egnu*egnu+2*tmp3); - SPRINTZ(sh_form_str,"egg; diameter(d):%%.10g, epsilon=%.10g, nu=%.10g, a/d=%.10g", - egeps,egnu,ad); - Nmat_need=1; - yx_ratio=1; - zx_ratio=ad*(tmp1+tmp2); // (a/d)*[1/sqrt(eps+nu)+1/sqrt(eps-nu)] - } - else if (shape==SH_ELLIPSOID) { - aspectY=sh_pars[0]; - TestPositive(aspectY,"aspect ratio y/x"); - aspectZ=sh_pars[1]; - TestPositive(aspectZ,"aspect ratio z/x"); - SPRINTZ(sh_form_str,"ellipsoid; size along x-axis:%%.10g, aspect ratios y/x=%.10g, " - "z/x=%.10g",aspectY,aspectZ); - if (aspectY!=1) symR=FALSE; - // set inverse squares of aspect ratios - invsqY=1/(aspectY*aspectY); - invsqZ=1/(aspectZ*aspectZ); - volume_ratio=PI_OVER_SIX*aspectY*aspectZ; - yx_ratio=aspectY; - zx_ratio=aspectZ; - Nmat_need=1; - } - else if (shape==SH_LINE) { - STRCPYZ(sh_form_str,"line; length:%g"); - symY=symZ=symR=FALSE; - n_boxY=n_boxZ=jagged; - yx_ratio=zx_ratio=UNDEF; - volume_ratio=UNDEF; - Nmat_need=1; - } - else if(shape==SH_RBC) { - /* three-parameter shape; developed by K.A.Semyanov,P.A.Tarasov,P.A.Avrorov - * based on work by P.W.Kuchel and E.D.Fackerell, "Parametric-equation representation - * of biconcave erythrocytes," Bulletin of Mathematical Biology 61, 209-220 (1999). - * ro^4+2S*ro^2*z^2+z^4+P*ro^2+Q*z^2+R=0, ro^2=x^2+y^2, P,Q,R,S are determined by d,h,b,c - * given in the command line. - */ - h_d=sh_pars[0]; - TestPositive(h_d,"ratio of maximum width to diameter"); - b_d=sh_pars[1]; - TestNonNegative(b_d,"ratio of minimum width to diameter"); - if (h_d<=b_d) PrintErrorHelp("given RBC is not biconcave; maximum width is in the center"); - c_d=sh_pars[2]; - TestRangeII(c_d,"relative diameter of maximum width",0,1); - SPRINTZ(sh_form_str, - "red blood cell; diameter(d):%%.10g, maximum and minimum width h/d=%.10g, b/d=%.10g\n" - " diameter of maximum width c/d=%.10g",h_d,b_d,c_d); - // calculate shape parameters - h2=h_d*h_d; - b2=b_d*b_d; - c2=c_d*c_d; - /* P={(b/d)^2*[c^4/(h^2-b^2)-h^2]-d^2}/4; Q=(d/b)^2*(P+d^2/4)-b^2/4; R=-d^2*(P+d^2/4)/4; - * S=-(2P+c^2)/h^2; here P,Q,R,S are made dimensionless dividing by respective powers of d - * Calculation is performed so that Q is well defined even for b=0. - */ - tmp1=((c2*c2/(h2-b2))-h2)/4; - P=b2*tmp1-0.25; - Q=tmp1-(b2/4); - R=-b2*tmp1/4; - S=-(2*P+c2)/h2; - yx_ratio=1; - zx_ratio=h_d; - volume_ratio=UNDEF; - Nmat_need=1; - } - else if (shape==SH_READ) { - SPRINTZ(sh_form_str,"specified by file %s; size along x-axis:%%.10g",shape_fname); - symX=symY=symZ=symR=FALSE; // input file is assumed asymmetric - InitDipFile(shape_fname,&n_boxX,&n_boxY,&n_boxZ,&Nmat_need); - yx_ratio=zx_ratio=UNDEF; - volume_ratio=UNDEF; - } - else if (shape==SH_SPHERE) { - STRCPYZ(sh_form_str,"sphere; diameter:%.10g"); - volume_ratio=PI_OVER_SIX; - yx_ratio=zx_ratio=1; - Nmat_need=1; - } - else if (shape==SH_SPHEREBOX) { - coat_ratio=sh_pars[0]; - TestRangeII(coat_ratio,"sphere diameter/cube edge ratio",0,1); - SPRINTZ(sh_form_str, - "sphere in cube; size of cube edge(a):%%.10g, diameter of sphere d/a=%.10g",coat_ratio); - coat_r2=0.25*coat_ratio*coat_ratio; - yx_ratio=zx_ratio=1; - volume_ratio=1; - Nmat_need=2; - } - /* TO ADD NEW SHAPE - * add an option here (in the end of 'else if' sequence). Identifier ('SH_...') should be - * defined in const.h. The option should - * 1) save all the input parameters from array 'sh_pars' to local variables - * (defined in the beginning of this source files) - * 2) test all input parameters (for that you're encouraged to use functions from param.h since - * they would automatically produce informative output in case of error). If the shape can - * accept different number of parameters (UNDEF was set in array shape_opt) then also test - * the number of parameters. - * 3) if shape breaks any symmetry, corresponding variable should be set to FALSE. Do not set - * any of them to TRUE, as they can be set to FALSE by some other factors. - * symX, symY, symZ - symmetries of reflection over planes YZ, XZ, XY respectively. - * symR - symmetry of rotation for 90 degrees over the Z axis - * 4) initialize the following: - * sh_form_str - descriptive string, should contain %g - it would be replaced by box size along - * x-axis afterwards (in param.c). - * Either yx_ratio (preferably) or n_boxY. The former is a ratio of particle sizes along y and x - * axes. Initialize n_boxY directly only if it is not proportional to boxX, like in - * shape LINE above, since boxX is not initialized at this moment. If yx_ratio is not - * initialized, set it explicitly to UNDEF. - * Analogously either zx_ratio (preferably) or n_boxZ. - * Nmat_need - number of different domains in this shape (void is not included) - * volume_ratio - ratio of particle volume to (boxX)^3. Initialize it if it can be calculated - * analytically or set to UNDEF otherwise. This parameter is crucial if one wants - * to initialize computational grid from '-eq_rad' and '-dpl'. - * n_sizeX - absolute size of the particle, defined by shape; initialize only when relevant, - * e.g. for shapes such as 'axisymmetric'. - * all other auxiliary variables, which are used in shape generation (MakeParticle(), see - * below), should be defined in the beginning of this file. If you need temporary local - * variables (which are used only in this part of the code), either use 'tmp1'-'tmp3' or - * define your own (with more informative names) in the beginning of this function. - * Also (rarely) if the shape defines dimension of the computational grid or absolute size of - * the particle, correct values of box_det_sh and size_det_sh in the beginning of this function. - */ - - // initialize domain granulation - if (sh_granul) { - symX=symY=symZ=symR=FALSE; // no symmetry with granules - if (gr_mat+1>Nmat_need) - PrintError("Specified domain number to be granulated (%d) is larger than total number " - "of domains (%d) for the given shape (%s)",gr_mat+1,Nmat_need,shapename); - else Nmat_need++; - strcat(shapename,"_gran"); - } - // check if enough refractive indices or extra - if (NmatNmat_need) LogError(EC_INFO,ONE_POS, - "More refractive indices are given (%d) than actually used (%d)",Nmat,Nmat_need); - Nmat=Nmat_need; - - // check anisotropic refractive indices for symmetries - if (anisotropy) for (i=0;iboxX) - PrintError("Particle (boxX=%d) does not fit into specified boxX=%d",n_boxX,boxX); - } - // if shape is determined by ratios, calculate proposed grid sizes along y and z axes - if (yx_ratio!=UNDEF) n_boxY=(int)ceil(yx_ratio*boxX); - if (zx_ratio!=UNDEF) n_boxZ=(int)ceil(zx_ratio*boxX); - // set boxY and boxZ - if (boxY==UNDEF) { // assumed that boxY and boxZ are either both defined or both not defined - boxY=FitBox(n_boxY); - boxZ=FitBox(n_boxZ); - } - else { - temp=boxY; - if ((boxY=FitBox(boxY))!=temp) - LogError(EC_WARN,ONE_POS,"boxY has been adjusted from %i to %i",temp,boxY); - temp=boxZ; - if ((boxZ=FitBox(boxZ))!=temp) - LogError(EC_WARN,ONE_POS,"boxZ has been adjusted from %i to %i",temp,boxZ); - // this error is not duplicated in the log file since it does not yet exist - if (n_boxY>boxY || n_boxZ>boxZ) - PrintError("Particle (boxY,Z={%d,%d}) does not fit into specified boxY,Z={%d,%d}", - n_boxY,n_boxZ,boxY,boxZ); - } - // initialize number of dipoles - Ndip=boxX*((double)boxY)*boxZ; - // initialize maxiter; not very realistic - if (maxiter==UNDEF) maxiter=MIN(INT_MAX,3*Ndip); - // some old, not really logical heuristics for Ntheta, but better than constant value - if (nTheta==UNDEF) { - if (Ndip<1000) nTheta=91; - else if (Ndip<10000) nTheta=181; - else if (Ndip<100000) nTheta=361; - else nTheta=721; - } - // this limitation should be removed in the future - if (chp_type!=CHP_NONE && (!symR || scat_grid)) LogError(EC_ERROR,ONE_POS, - "Currently checkpoints can be used when internal fields are calculated only once," - "i.e. for a single incident polarization."); - Timing_Particle = GET_TIME() - tstart; + int n_boxX,n_boxY,n_boxZ,temp; /* new values for dimensions */ + double h_d,b_d,c_d,h2,b2,c2; + double yx_ratio,zx_ratio,tmp1,tmp2,tmp3; + double diskratio,aspectY,aspectZ; + double ad,ct,ct2; /* cos(theta0) and its square */ + TIME_TYPE tstart; + int Nmat_need,i; + int dpl_def_used; /* if default dpl is used for grid initialization */ + int box_det_sh; /* if boxX is determined by shape itself */ + /* TO ADD NEW SHAPE + Add here all intermediate variables, which are used only inside this function. You may as well + use 'tmp1'-'tmp3' variables defined above. */ + + tstart=GET_TIME(); + /* trivial now, but may be more cases in the future */ + box_det_sh=(shape==SH_READ); + /* check for redundancy of input data */ + if (dpl!=UNDEF && (sizeX!=UNDEF || a_eq!=UNDEF)) { + if (boxX!=UNDEF) PrintError("Extra information is given by setting '-dpl, '-grid', and "\ + "either '-size' or '-eq_rad'"); + if (box_det_sh) PrintError("Extra information is given by setting both '-dpl' and either "\ + "'-size' or '-eq_rad', while shape '%s' sets the size of the grid",shapename); + } + /* calculate default dpl - 10*sqrt(max(|m|)); + for anisotropic each component is considered separately */ + tmp2=0; + for (i=0;i0.25*(1-coat_ratio)*(1-coat_ratio)) + PrintErrorHelp("Inner sphere is not fully inside the outer"); + SPRINTZ(sh_form_str+strlen(sh_form_str), + "\n position of inner sphere center r/d= {%.10g,%.10g,%.10g}", + coat_x,coat_y,coat_z); + } + else coat_x=coat_y=coat_z=0; /* initialize default values */ + coat_r2=0.25*coat_ratio*coat_ratio; + volume_ratio=PI_OVER_SIX; + if (coat_x!=0) symX=symR=FALSE; + if (coat_y!=0) symY=symR=FALSE; + if (coat_z!=0) symZ=FALSE; + yx_ratio=zx_ratio=1; + Nmat_need=2; + } + else if(shape==SH_CYLINDER) { + diskratio=sh_pars[0]; + TestPositive(diskratio,"height to diameter ratio"); + SPRINTZ(sh_form_str,"cylinder; diameter(d):%%.10g, height h/d=%.10g",diskratio); + hdratio=diskratio/2; + volume_ratio=PI_OVER_FOUR*diskratio; + yx_ratio=1; + zx_ratio=diskratio; + Nmat_need=1; + } + else if (shape==SH_EGG) { + /* determined by equation: (a/r)^2=1+nu*cos(theta)-(1-eps)cos^2(theta) + or equivalently: a^2=r^2+nu*r*z-(1-eps)z^2. Parameters must be 00). Although it may overflow faster + for nu->eps, volume_ratio (below) will overflow even faster. It is used to shift coordinates + from the computational reference frame (centered at z0) to the natural one */ + egz0=-ad*egnu*(tmp1*tmp1*tmp2*tmp2)/(tmp1+tmp2); + /* (V/d^3)=(4*pi/3)*(a/d)^3*{[2(1-eps)-nu]/sqrt(eps+nu)+[2(1-eps)+nu]/sqrt(eps-nu)}/ + /[nu^2+4(1-eps)] */ + volume_ratio=FOUR_PI_OVER_THREE*ad2*ad*((tmp3-egnu)*tmp1+(tmp3+egnu)*tmp2)/(egnu*egnu+2*tmp3); + SPRINTZ(sh_form_str,"egg; diameter(d):%%.10g, epsilon=%.10g, nu=%.10g, a/d=%.10g", + egeps,egnu,ad); + Nmat_need=1; + yx_ratio=1; + zx_ratio=ad*(tmp1+tmp2); /* (a/d)*[1/sqrt(eps+nu)+1/sqrt(eps-nu)] */ + } + else if (shape==SH_ELLIPSOID) { + aspectY=sh_pars[0]; + TestPositive(aspectY,"aspect ratio y/x"); + aspectZ=sh_pars[1]; + TestPositive(aspectZ,"aspect ratio z/x"); + SPRINTZ(sh_form_str,"ellipsoid; size along x-axis:%%.10g, aspect ratios y/x=%.10g, z/x=%.10g", + aspectY,aspectZ); + if (aspectY!=1) symR=FALSE; + /* set inverse squares of ascpect ratios */ + invsqY=1/(aspectY*aspectY); + invsqZ=1/(aspectZ*aspectZ); + volume_ratio=PI_OVER_SIX*aspectY*aspectZ; + yx_ratio=aspectY; + zx_ratio=aspectZ; + Nmat_need=1; + } + else if (shape==SH_LINE) { + STRCPYZ(sh_form_str,"line; length:%g"); + symY=symZ=symR=FALSE; + n_boxY=n_boxZ=jagged; + yx_ratio=zx_ratio=UNDEF; + volume_ratio=UNDEF; + Nmat_need=1; + } + else if(shape==SH_RBC) { + /* three-parameter shape; developed by K.A.Semyanov,P.A.Tarasov,P.A.Avrorov + based on work by P.W.Kuchel and E.D.Fackerell, "Parametric-equation representation + of biconcave erythrocytes," Bulletin of Mathematical Biology 61, 209-220 (1999). + ro^4+2S*ro^2*z^2+z^4+P*ro^2+Q*z^2+R=0, ro^2=x^2+y^2, P,Q,R,S are determined by d,h,b,c given + in the command line */ + h_d=sh_pars[0]; + TestPositive(h_d,"ratio of maximum width to diameter"); + b_d=sh_pars[1]; + TestNonNegative(b_d,"ratio of minimum width to diameter"); + if (h_d<=b_d) PrintErrorHelp("given RBC is not biconcave; maximum width is in the center"); + c_d=sh_pars[2]; + TestRangeII(c_d,"relative diameter of maximum width",0,1); + SPRINTZ(sh_form_str, + "red blood cell; diameter(d):%%.10g, maximum and minimum width h/d=%.10g, b/d=%.10g\n"\ + " diameter of maximum width c/d=%.10g",h_d,b_d,c_d); + /* calculate shape parameters */ + h2=h_d*h_d; + b2=b_d*b_d; + c2=c_d*c_d; + /* P={(b/d)^2*[c^4/(h^2-b^2)-h^2]-d^2}/4; Q=(d/b)^2*(P+d^2/4)-b^2/4; R=-d^2*(P+d^2/4)/4; + S=-(2P+c^2)/h^2; here P,Q,R,S are made dimensionless dividing by respective powers of d + Calculation is performed so that Q is well defined even for b=0 */ + tmp1=((c2*c2/(h2-b2))-h2)/4; + P=b2*tmp1-0.25; + Q=tmp1-(b2/4); + R=-b2*tmp1/4; + S=-(2*P+c2)/h2; + yx_ratio=1; + zx_ratio=h_d; + volume_ratio=UNDEF; + Nmat_need=1; + } + else if (shape==SH_READ) { + SPRINTZ(sh_form_str,"specified by file %s; size along x-axis:%%.10g",aggregate_file); + symX=symY=symZ=symR=FALSE; /* input file is assumed assymetric */ + InitDipFile(aggregate_file,&n_boxX,&n_boxY,&n_boxZ,&Nmat_need); + yx_ratio=zx_ratio=UNDEF; + volume_ratio=UNDEF; + } + else if (shape==SH_SPHERE) { + STRCPYZ(sh_form_str,"sphere; diameter:%.10g"); + volume_ratio=PI_OVER_SIX; + yx_ratio=zx_ratio=1; + Nmat_need=1; + } + else if (shape==SH_SPHEREBOX) { + coat_ratio=sh_pars[0]; + TestRangeII(coat_ratio,"sphere diameter/cube edge ratio",0,1); + SPRINTZ(sh_form_str, + "sphere in cube; size of cube edge(a):%%.10g, diameter of sphere d/a=%.10g",coat_ratio); + coat_r2=0.25*coat_ratio*coat_ratio; + yx_ratio=zx_ratio=1; + volume_ratio=1; + Nmat_need=2; + } + /* TO ADD NEW SHAPE + add an option here (in the end of 'else if' sequence). Identifier ('SH_...') should be + defined in const.h. The option should + 1) save all the input parameters from array 'sh_pars' to local variables + (defined in the beginning of this source files) + 2) test all input parameters (for that you're encouraged to use functions from param.h since + they would automatically produce informative output in case of error). If the shape can + accept different number of parameters (UNDEF was set in array shape_opt) then also test the + number of parameters. + 3) if shape breaks any symmetry, corresponding variable should be set to FALSE. Do not set + any of them to TRUE, as they can be set to FALSE by some other factors. + symX, symY, symZ - symmetries of reflection over planes YZ, XZ, XY respectively. + symR - symmetry of rotation for 90 degrees over the Z axis + 4) initialize the following: + sh_form_str - descriptive string, should contain %g - it would be replaced by box size along + x-axis afterwards (in param.c). + Either yx_ratio (preferably) or n_boxY. The former is a ratio of particle sizes along y and x + axes. Initialize n_boxY directly only if it is not proportional to boxX, like in + shape LINE above, since boxX is not initialized at this moment. If yx_ratio is not + initialized, set it explicitely to UNDEF. + Analogously either zx_ratio (preferably) or n_boxZ. + Nmat_need - number of different domains in this shape (void is not included) + volume_ratio - ratio of particle volume to (boxX)^3. Initialize it if it can be calculated + analytically or set to UNDEF otherwise. This parameter is crucial if one wants + to initialize computational grid from '-eq_rad' and '-dpl'. + all other auxiliary variables, which are used in shape generation (MakeParticle(), see below), + should be defined in the beginning of this file. If you need temporary local variables + (which are used only in this part of the code), either use 'tmp1'-'tmp3' or define your + own (with more informative names) in the beginning of this function. */ + + /* initialize domain granulation */ + if (sh_granul) { + symX=symY=symZ=symR=FALSE; /* no symmetry with granules */ + if (gr_mat+1>Nmat_need) PrintError("Specified domain number to be granulated (%d) is larger "\ + "than total number of domains (%d) for the given shape (%s)",gr_mat+1,Nmat_need,shapename); + else Nmat_need++; + strcat(shapename,"_gran"); + } + /* check if enough refr. indices or extra */ + if (NmatNmat_need) LogError(EC_INFO,ONE_POS, + "More refractive indices are given (%d) than actually used (%d)",Nmat,Nmat_need); + Nmat=Nmat_need; + + /* ckeck anisotropic refractive indices for symmetries */ + if (anisotropy) for (i=0;iboxX) + PrintError("Particle (boxX=%d) does not fit into specified boxX=%d",n_boxX,boxX); + } + /* if shape is determined by ratios, calculate proposed grid sizes along y and z axes */ + if (yx_ratio!=UNDEF) n_boxY=(int)ceil(yx_ratio*boxX); + if (zx_ratio!=UNDEF) n_boxZ=(int)ceil(zx_ratio*boxX); + /* set boxY and boxZ */ + if (boxY==UNDEF) { /* assumed that boxY and boxZ are either both defined or both not defined */ + boxY=FitBox(n_boxY); + boxZ=FitBox(n_boxZ); + } + else { + temp=boxY; + if ((boxY=FitBox(boxY))!=temp) + LogError(EC_WARN,ONE_POS,"boxY has been adjusted from %i to %i",temp,boxY); + temp=boxZ; + if ((boxZ=FitBox(boxZ))!=temp) + LogError(EC_WARN,ONE_POS,"boxZ has been adjusted from %i to %i",temp,boxZ); + /* this error is not duplicated in the logfile since it does not yet exist */ + if (n_boxY>boxY || n_boxZ>boxZ) + PrintError("Particle (boxY,Z={%d,%d}) does not fit into specified boxY,Z={%d,%d}", + n_boxY,n_boxZ,boxY,boxZ); + } + /* initialize number of dipoles */ + Ndip=boxX*((double)boxY)*boxZ; + /* initialize maxiter; not very realistic */ + if (maxiter==UNDEF) maxiter=MIN(INT_MAX,3*Ndip); + /* some old, not really logical heuristics for Ntheta, but better than constant value */ + if (nTheta==UNDEF) { + if (Ndip<1000) nTheta=91; + else if (Ndip<10000) nTheta=181; + else if (Ndip<100000) nTheta=361; + else nTheta=721; + } + /* this limitation should be removed in the future */ + if (chp_type!=CHP_NONE && (!symR || scat_grid)) LogError(EC_ERROR,ONE_POS, + "Currently checkpoints can be used when internal fields are calculated only once,"\ + "i.e. for a single incident polarization."); + Timing_Particle = GET_TIME() - tstart; } -//========================================================== +/*==========================================================*/ void MakeParticle(void) -// creates a particle; initializes all dipoles counts, dpl, gridspace + /* creates a particle; initializes all dipoles counts, dpl, gridspace */ { - int i,j,k,ns; - size_t index,dip,nlocalRows_tmp; - double tmp1,tmp2,tmp3; - double xr,yr,zr,xcoat,ycoat,zcoat,r2,z2,zshift; - double jcX,jcY,jcZ; // center for jagged - int local_z0_unif; // should be global or semi-global - int largerZ,smallerZ; // number of larger and smaller z in intersections with contours - int xj,yj,zj; - int mat; - unsigned short us_tmp; - TIME_TYPE tstart,tgran; - /* TO ADD NEW SHAPE - * Add here all intermediate variables, which are used only inside this function. You may as - * well use 'tmp1'-'tmp3' variables defined above. - */ - - tstart=GET_TIME(); - - index=0; - // assumed that box's are even - jcX=jcY=jcZ=jagged/2.0; - cX=(boxX-1)/2.0; - cY=(boxY-1)/2.0; - cZ=(boxZ-1)/2.0; - nlocalRows_tmp=MultOverflow(3,local_Ndip,ALL_POS,"nlocalRows_tmp"); - /* allocate temporary memory; even if prognosis, since they are needed for exact estimation - * they will be reallocated afterwards (when nlocalRows is known). - */ - MALLOC_VECTOR(material_tmp,uchar,local_Ndip,ALL); - MALLOC_VECTOR(DipoleCoord_tmp,double,nlocalRows_tmp,ALL); - MALLOC_VECTOR(position_tmp,ushort,nlocalRows_tmp,ALL); - - for(k=local_z0;k=contRoSqMin && r2<=0.25) { - largerZ=smallerZ=0; - contCurRo=sqrt(r2); - contCurZ=zr; - for (ns=0;ns=contSegRoMin[ns] && contCurRo<=contSegRoMax[ns]) - CheckContourSegment(contSeg+ns) ? largerZ++ : smallerZ++; - // check for consistency; if the code is perfect, this is not needed - if (!IS_EVEN(largerZ+smallerZ)) LogError(EC_ERROR,ALL_POS, - "Point (ro,z)=(%g,%g) produced weird result when checking whether it lies " - "inside the contour. Larger than z %d intersections, smaller - %d.", - contCurRo,contCurZ,largerZ,smallerZ); - if (!IS_EVEN(largerZ)) mat=0; - } - } - else if (shape==SH_BOX) { - if (fabs(yr)<=haspY && fabs(zr)<=haspZ) mat=0; - } - else if (shape==SH_CAPSULE) { - r2=xr*xr+yr*yr; - if (r2<=0.25) { - tmp1=fabs(zr)-hdratio; - if (tmp1<=0 || tmp1*tmp1+r2<=0.25) mat=0; - } - } - else if (shape==SH_COATED) { - if (xr*xr+yr*yr+zr*zr<=0.25) { // first test to skip some dipoles immediately) - xcoat=xr-coat_x; - ycoat=yr-coat_y; - zcoat=zr-coat_z; - if (xcoat*xcoat+ycoat*ycoat+zcoat*zcoat<=coat_r2) mat=1; - else mat=0; - } - } - else if (shape==SH_CYLINDER) { - if(xr*xr+yr*yr<=0.25 && fabs(zr)<=hdratio) mat=0; - } - else if (shape==SH_EGG) { - r2=xr*xr+yr*yr; - zshift=zr+egz0; - z2=zshift*zshift; - if (r2+egeps*z2+egnu*zshift*sqrt(r2+z2)<=ad2) mat=0; - } - else if (shape==SH_ELLIPSOID) { - if (xr*xr+yr*yr*invsqY+zr*zr*invsqZ<=0.25) mat=0; - } - else if (shape==SH_LINE) { - if (yj==0 && zj==0) mat=0; - } - else if (shape==SH_RBC) { - r2=xr*xr+yr*yr; - z2=zr*zr; - if (r2*r2+2*S*r2*z2+z2*z2+P*r2+Q*z2+R<=0) mat=0; - } - else if (shape==SH_SPHERE) { - if (xr*xr+yr*yr+zr*zr<=0.25) mat=0; - } - else if (shape==SH_SPHEREBOX) { - if (xr*xr+yr*yr+zr*zr<=coat_r2) mat=1; - else if (fabs(yr)<=0.5 && fabs(zr)<=0.5) mat=0; - } - /* TO ADD NEW SHAPE - * add an option here (in the end of 'else if' sequence). Identifier ('SH_...') - * should be defined in const.h. This option should set 'mat' - index of domain for - * a point, specified by {xr,yr,zr} - coordinates divided by grid size along X (xr - * from -0.5 to 0.5, others - depending on aspect ratios). C array indexing used: - * mat=0 - first domain, etc. If point corresponds to void, do not set 'mat'. If you - * need temporary local variables (which are used only in this part of the code), - * either use 'tmp1'-'tmp3' or define your own (with more informative names) in the - * beginning of this function. - */ - - position_tmp[3*index]=(unsigned short)i; - position_tmp[3*index+1]=(unsigned short)j; - position_tmp[3*index+2]=(unsigned short)k; - // afterwards multiplied by gridspace - DipoleCoord_tmp[3*index]=i-cX; - DipoleCoord_tmp[3*index+1]=j-cY; - DipoleCoord_tmp[3*index+2]=k-cZ; - material_tmp[index]=(unsigned char)mat; - index++; - } // End box loop - if (shape==SH_READ) ReadDipFile(shape_fname); - // initialization of mat_count and dipoles counts - for(i=0;i<=Nmat;i++) mat_count[i]=0; - for(dip=0;dip $@.$$$$; \ sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ rm -f $@.$$$$ diff --git a/src/matvec.c b/src/matvec.c index 7f3dddf9..01dfd8d9 100644 --- a/src/matvec.c +++ b/src/matvec.c @@ -1,11 +1,11 @@ /* FILE: matvec.c * AUTH: Maxim Yurkin * DESCR: calculate local matrix vector product of decomposed interaction - * matrix with r_k or p_k, using a FFT based convolution algorithm + * matrix with rk or pk, using a FFT based convolution algorithm * * Previous version by Michel Grimminck * - * Copyright (C) 2006-2008 University of Amsterdam + * Copyright (C) 2006-2007 University of Amsterdam * This code is covered by the GNU General Public License. */ #include @@ -20,331 +20,332 @@ #include "function.h" #include "io.h" -// SEMI-GLOBAL VARIABLES +/* SEMI-GLOBAL VARIABLES */ -// defined and initialized in fft.c +/* defined and initialized in fft.c */ extern const doublecomplex *Dmatrix; extern doublecomplex *Xmatrix,*slices,*slices_tr; extern const size_t DsizeY,DsizeZ,DsizeYZ; -//============================================================ +/*============================================================*/ INLINE size_t IndexSliceZY(const size_t y,const size_t z) { - return (z*gridY+y); + return (z*gridY+y); } -//============================================================ +/*============================================================*/ INLINE size_t IndexSliceYZ(const size_t y,const size_t z) { - return(y*gridZ+z); + return(y*gridZ+z); } -//============================================================ +/*============================================================*/ INLINE size_t IndexGarbledX(const size_t x,const size_t y,const size_t z) { #ifdef PARALLEL - return(((z%local_Nz)*smallY+y)*gridX+(z/local_Nz)*local_Nx+x%local_Nx); + return(((z%local_Nz)*smallY+y)*gridX+(z/local_Nz)*local_Nx+x%local_Nx); #else - return((z*smallY+y)*gridX+x); + return((z*smallY+y)*gridX+x); #endif } -//============================================================ +/*============================================================*/ INLINE size_t IndexXmatrix(const size_t x,const size_t y,const size_t z) { - return((z*smallY+y)*gridX+x); + return((z*smallY+y)*gridX+x); } -//============================================================ +/*============================================================*/ INLINE size_t IndexDmatrix_mv(size_t x,size_t y,size_t z,const int transposed) { - if (transposed) { // used only for G_SO - if (x>0) x=gridX-x; - if (y>0) y=gridY-y; - if (z>0) z=gridZ-z; - } - else { - if (y>=DsizeY) y=gridY-y; - if (z>=DsizeZ) z=gridZ-z; - } + if (transposed) { /* used only for G_SO */ + if (x>0) x=gridX-x; + if (y>0) y=gridY-y; + if (z>0) z=gridZ-z; + } + else { + if (y>=DsizeY) y=gridY-y; + if (z>=DsizeZ) z=gridZ-z; + } - return(NDCOMP*(x*DsizeYZ+z*DsizeY+y)); + return(NDCOMP*(x*DsizeYZ+z*DsizeY+y)); } -//============================================================ +/*============================================================*/ -void MatVec (doublecomplex *argvec, // the argument vector - doublecomplex *resultvec, // the result vector - double *inprod, // the resulting inner product - const int her) // 0 for non-Hermitian, 1 for Hermitian -/* This function implements both MatVec_nim and MatVecAndInp_nim. The difference is that when we - * want to calculate the inner product as well, we pass 'inprod' as a non-NULL pointer. if 'inprod' - * is NULL, we don't calculate it. 'argvec' always remains unchanged afterwards, however it is not - * strictly const - some manipulations may occur during the execution. - */ +void MatVec (doublecomplex *argvec, /* the argument vector */ + doublecomplex *resultvec, /* the result vector */ + double *inprod, /* the result inner product */ + const int her) /* 0 for non-hermitic, 1 for hermetic */ +/* This function implements both MatVec_nim and MatVecAndInp_nim. + the difference is that when we want to calculate the inproduct + as well, we pass 'inprod' as a non-null pointer. if 'inprod' is + a NULL, we don't calculate it. + argvec allways remains unchanged afterwards, however it is not + strictly const - some manipulations may occur during the execution */ { - size_t i,j; - doublecomplex fmat[6],xv[3],yv[3]; - doublecomplex temp; - size_t index,x,y,z,Xcomp; - int ipr; - unsigned char mat; - int transposed; - size_t boxY_st=boxY,boxZ_st=boxZ; // copies with different type + size_t i,j; + doublecomplex fmat[6],xv[3],yv[3]; + doublecomplex temp; + size_t index,x,y,z,Xcomp; + int ipr; + unsigned char mat; + int transposed; + size_t boxY_st=boxY,boxZ_st=boxZ; /* copies with different type */ #ifdef PRECISE_TIMING - SYSTEM_TIME tvp[18]; - SYSTEM_TIME Timing_FFTXf,Timing_FFTYf,Timing_FFTZf,Timing_FFTXb,Timing_FFTYb,Timing_FFTZb, - Timing_Mult1,Timing_Mult2,Timing_Mult3,Timing_Mult4,Timing_Mult5, - Timing_BTf,Timing_BTb,Timing_TYZf,Timing_TYZb,Timing_ipr; - double t_FFTXf,t_FFTYf,t_FFTZf,t_FFTXb,t_FFTYb,t_FFTZb, - t_Mult1,t_Mult2,t_Mult3,t_Mult4,t_Mult5,t_ipr, - t_BTf,t_BTb,t_TYZf,t_TYZb,t_Arithm,t_FFT,t_Comm; + SYSTEM_TIME tvp[18]; + SYSTEM_TIME Timing_FFTXf,Timing_FFTYf,Timing_FFTZf,Timing_FFTXb,Timing_FFTYb,Timing_FFTZb, + Timing_Mult1,Timing_Mult2,Timing_Mult3,Timing_Mult4,Timing_Mult5, + Timing_BTf,Timing_BTb,Timing_TYZf,Timing_TYZb,Timing_ipr; + double t_FFTXf,t_FFTYf,t_FFTZf,t_FFTXb,t_FFTYb,t_FFTZb, + t_Mult1,t_Mult2,t_Mult3,t_Mult4,t_Mult5,t_ipr, + t_BTf,t_BTb,t_TYZf,t_TYZb,t_Arithm,t_FFT,t_Comm; #endif -/* A = I + S.D.S - * S = sqrt(C) - * A.x = x + S.D.(S.x) - * A(H).x = x + (S(T).D(T).S(T).x(*))(*) - * C,S - diagonal => symmetric - * (!! will change if tensor (non-diagonal) polarizability is used !!) - * D - symmetric (except for G_SO) - * - * D.x=F(-1)(F(D).F(X)) - * F(D) is just a vector - * - * G_SO: F(D(T)) (k) = F(D) (-k) - * k - vector index - * - * For (her) three additional operations of nConj are used. Should not be a problem, - * but can be avoided by a more complex code. - */ + /* A = I + S.D.S + * S = sqrt(C) + * A.x = x + S.D.(S.x) + * A(H).x = x + (S(T).D(T).S(T).x(*))(*) + * C,S - diagonal => symmetric + * (!! will change if tensor (non-diagonal) polarizability is used !!) + * D - symmetric (except for G_SO) + * + * D.x=F(-1)(F(D).F(X)) + * F(D) is just a vector + * + * G_SO: F(D(T)) (k) = F(D) (-k) + * k - vector index + * + * For (her) three additional operations of nConj are used. Should not be a problem, + * but can be avoided by a more complex code. + */ - transposed=(!reduced_FFT) && her; - if (inprod) ipr=TRUE; - else ipr=FALSE; + transposed=(!reduced_FFT) && her; + if (inprod) ipr=TRUE; + else ipr=FALSE; #ifdef PRECISE_TIMING - InitTime(&Timing_FFTYf); - InitTime(&Timing_FFTZf); - InitTime(&Timing_FFTYb); - InitTime(&Timing_FFTZb); - InitTime(&Timing_Mult2); - InitTime(&Timing_Mult3); - InitTime(&Timing_Mult4); - InitTime(&Timing_TYZf); - InitTime(&Timing_TYZb); - GetTime(tvp); + InitTime(&Timing_FFTYf); + InitTime(&Timing_FFTZf); + InitTime(&Timing_FFTYb); + InitTime(&Timing_FFTZb); + InitTime(&Timing_Mult2); + InitTime(&Timing_Mult3); + InitTime(&Timing_Mult4); + InitTime(&Timing_TYZf); + InitTime(&Timing_TYZb); + GetTime(tvp); #endif - // FFT_matvec code - if (ipr) *inprod = 0.0; + /* FFT_matvec code */ + if (ipr) *inprod = 0.0; - // fill Xmatrix with 0.0 - for (i=0;i<3*local_Nsmall;i++) Xmatrix[i][RE]=Xmatrix[i][IM]=0.0; + /* fill Xmatrix with 0.0 */ + for (i=0;i<3*local_Nsmall;i++) Xmatrix[i][RE]=Xmatrix[i][IM]=0.0; - // transform from coordinates to grid and multiply with coupling constant - if (her) nConj(argvec); // conjugated back afterwards + /* transform from coordinates to grid and multiply with coupling constant */ + if (her) nConj(argvec); /* conjugated back afterwards */ - for (i=0;ismallY) { - cInvSign(fmat[1]); // fmat[1]*=-1 - if (z>smallZ) cInvSign(fmat[2]); // fmat[2]*=-1 - else cInvSign(fmat[4]); // fmat[4]*=-1 - } - else if (z>smallZ) { - cInvSign(fmat[2]); // fmat[2]*=-1 - cInvSign(fmat[4]); // fmat[4]*=-1 - } - } - cSymMatrVec(fmat,xv,yv); // yv=fmat*xv - for (Xcomp=0;Xcomp<3;Xcomp++) - cEqual(yv[Xcomp],slices_tr[i+Xcomp*gridYZ]); - } + j=IndexDmatrix_mv(x-local_x0,y,z,transposed); + memcpy(fmat,Dmatrix[j],6*sizeof(doublecomplex)); + if (reduced_FFT) { + if (y>smallY) { + cInvSign(fmat[1]); /* fmat[1]*=-1 */ + if (z>smallZ) cInvSign(fmat[2]); /* fmat[2]*=-1 */ + else cInvSign(fmat[4]); /* fmat[4]*=-1 */ + } + else if (z>smallZ) { + cInvSign(fmat[2]); /* fmat[2]*=-1 */ + cInvSign(fmat[4]); /* fmat[4]*=-1 */ + } + } + cSymMatrVec(fmat,xv,yv); /* yv=fmat*xv */ + for (Xcomp=0;Xcomp<3;Xcomp++) + cEqual(yv[Xcomp],slices_tr[i+Xcomp*gridYZ]); + } #ifdef PRECISE_TIMING - GetTime(tvp+9); - ElapsedInc(tvp+8,tvp+9,&Timing_Mult3); + GetTime(tvp+9); + ElapsedInc(tvp+8,tvp+9,&Timing_Mult3); #endif - // fft_invY&Z - fftY(FFT_BACKWARD); // fftY slices_tr + /* fft_invY&Z */ + fftY(FFT_BACKWARD); /* fftY slices_tr */ #ifdef PRECISE_TIMING - GetTime(tvp+10); - ElapsedInc(tvp+9,tvp+10,&Timing_FFTYb); + GetTime(tvp+10); + ElapsedInc(tvp+9,tvp+10,&Timing_FFTYb); #endif - TransposeYZ(FFT_BACKWARD); + TransposeYZ(FFT_BACKWARD); #ifdef PRECISE_TIMING - GetTime(tvp+11); - ElapsedInc(tvp+10,tvp+11,&Timing_TYZb); + GetTime(tvp+11); + ElapsedInc(tvp+10,tvp+11,&Timing_TYZb); #endif - fftZ(FFT_BACKWARD); // fftZ slices + fftZ(FFT_BACKWARD); /* fftZ slices */ #ifdef PRECISE_TIMING - GetTime(tvp+12); - ElapsedInc(tvp+11,tvp+12,&Timing_FFTZb); + GetTime(tvp+12); + ElapsedInc(tvp+11,tvp+12,&Timing_FFTZb); #endif - // copy slice back to Xmatrix - for(y=0;y @@ -18,248 +18,236 @@ #include "const.h" #ifdef FFTW3 -# include // for fftw_malloc +# include /* for fftw_malloc */ #endif -// common error check +/* common error check */ #define MALLOC_ERROR LogError(EC_ERROR,who,fname,line,"Could not malloc %s",name) #define CHECK_NULL(size,v) if ((size)!=0 && (v)==NULL) MALLOC_ERROR #define CHECK_SIZE(size,type) if ((SIZE_MAX/sizeof(type))<(size)) MALLOC_ERROR #define IF_FREE(v) if((v)!=NULL) free(v) #define OVERFLOW LogError(EC_ERROR,who,fname,line,"Integer overflow in '%s'",name); -//============================================================ +/*============================================================*/ void CheckOverflow(const double size,OTHER_ARGUMENTS) -// checks if size can fit into size_t type, otherwise overflow will happen before memory allocation + /* checks if size can fit into size_t type, + otherwise overflow will happen before memory allocation */ { - if (size>SIZE_MAX) OVERFLOW; + if (size>SIZE_MAX) OVERFLOW; } -//============================================================ +/*============================================================*/ size_t MultOverflow(const size_t a,const size_t b,OTHER_ARGUMENTS) -// multiplies two integers and checks for overflow + /* multiplies two integers and checks for overflow */ { - if ((SIZE_MAX/a)=nl + /* allocates double vector with indices from nl to nh; all arguments must be non-negative; + and nh>=nl */ { - double *v; - size_t size; - - if (nh=nrl; nch>=ncl - */ -{ - register size_t i; - size_t rows,cols; - int **m; - - if (nrh=nrl; nch>=ncl */ +{ + register size_t i; + size_t rows,cols; + int **m; + + if (nrh=nrl;i--) IF_FREE(m[i]+ncl); - IF_FREE(m+nrl); + for (i=nrh;i>=nrl;i--) IF_FREE(m[i]+ncl); + IF_FREE(m+nrl); } -//============================================================ +/*============================================================*/ void Free_general(void *v) -// frees general vector; kept in a special function for future development + /* frees general vector; kept in a special function for future development */ { - IF_FREE(v); + IF_FREE(v); } diff --git a/src/memory.h b/src/memory.h index f3aee821..6f17959a 100644 --- a/src/memory.h +++ b/src/memory.h @@ -4,22 +4,22 @@ * memory allocation and freeing * also includes overflows checks * - * Copyright (C) 2006-2008 University of Amsterdam + * Copyright (C) 2006-2007 University of Amsterdam * This code is covered by the GNU General Public License. */ #ifndef __memory_h #define __memory_h -#include // for size_t -#include "function.h" // for function attributes +#include /* for size_t */ +#include "function.h" /* for function attributes */ #define MBYTE 1048576.0 -// for conciseness +/* for conciseness */ #define OTHER_ARGUMENTS const int who,const char *fname,const int line,const char *name void CheckOverflow(double size,OTHER_ARGUMENTS); size_t MultOverflow(size_t a,size_t b,OTHER_ARGUMENTS); -// allocate +/* allocate */ doublecomplex *complexVector(size_t size,OTHER_ARGUMENTS) ATT_MALLOC; double **doubleMatrix(size_t rows,size_t cols,OTHER_ARGUMENTS) ATT_MALLOC; double *doubleVector(size_t size,OTHER_ARGUMENTS) ATT_MALLOC; @@ -30,20 +30,17 @@ unsigned short *ushortVector(size_t size,OTHER_ARGUMENTS) ATT_MALLOC; char *charVector(size_t size,OTHER_ARGUMENTS) ATT_MALLOC; unsigned char *ucharVector(size_t size,OTHER_ARGUMENTS) ATT_MALLOC; void *voidVector(size_t size,OTHER_ARGUMENTS) ATT_MALLOC; -// reallocate -double *doubleRealloc(double *ptr,const size_t size,OTHER_ARGUMENTS) ATT_MALLOC; -// free +/* free */ void Free_cVector(doublecomplex *v); void Free_dMatrix(double **m,size_t rows); void Free_dVector2(double *v,size_t nl); void Free_iMatrix(int **m,size_t nrl,size_t nrh,size_t ncl); void Free_general(void *v); -// macros to use for allocation and reallocation +/* macros to use for allocation */ #define MALLOC_VECTOR(vec,type,size,who) vec=type##Vector(size,who,POSIT,#vec) #define MALLOC_DVECTOR2(vec,nl,nh,who) vec=doubleVector2(nl,nh,who,POSIT,#vec) #define MALLOC_DMATRIX(vec,rows,cols,who) vec=doubleMatrix(rows,cols,who,POSIT,#vec) #define MALLOC_IMATRIX(vec,nrl,nrh,ncl,nch,who) vec=intMatrix(nrl,nrh,ncl,nch,who,POSIT,#vec) -#define REALLOC_DVECTOR(vec,size,who) vec=doubleRealloc(vec,size,who,POSIT,#vec) -#endif //__memory_h +#endif /*__memory_h*/ diff --git a/src/os.h b/src/os.h index 6de078eb..2e53d89e 100644 --- a/src/os.h +++ b/src/os.h @@ -2,23 +2,21 @@ * AUTH: Maxim Yurkin * DESCR: determines which operation system is used * - * Copyright (C) 2006-2008 University of Amsterdam + * Copyright (C) 2006-2007 University of Amsterdam * This code is covered by the GNU General Public License. */ #ifndef __os_h #define __os_h -/* If neither WINDOWS nor POSIX is found, some parts of the program, such as precise timing and - * file locking, will fail to compile - */ +/* If neither WINDOWS nor POSIX is found, some parts of the program, such as Precise Timing and + File Locking, will fail to compile */ #ifdef _WIN32 -# define WINDOWS -# include // all windows functions need this +# define WINDOWS +# include /* all windows functions need this */ /* this list is not exhaustive. gcc always defines __POSIX__ on POSIX-compliant systems, - * however other compilers do not necessarily do the same. You may define it manually - */ + however other compilers do not necessarily do the same. You may define it manually */ #elif defined(__POSIX__) || defined(unix) || defined (__unix) || defined (__unix__) -# define POSIX +# define POSIX #endif -#endif // __os_h +#endif /*__os_h*/ diff --git a/src/param.c b/src/param.c index ed5b7bad..a7493bb6 100644 --- a/src/param.c +++ b/src/param.c @@ -25,189 +25,181 @@ #include "function.h" #include "parbas.h" -// definitions for file locking +/* definitions for file locking */ #ifdef USE_LOCK -# ifdef WINDOWS -# define FILEHANDLE HANDLE -# elif defined(POSIX) -# include -# include -# ifdef LOCK_FOR_NFS -# include // for error handling of fcntl call -# endif -# define FILEHANDLE int -# else -# error *** Unknown operation system. Creation of lock files is not supported. *** -# endif -# define LOCK_WAIT 1 // in seconds -# define MAX_LOCK_WAIT_CYCLES 60 +# ifdef WINDOWS +# define FILEHANDLE HANDLE +# elif defined(POSIX) +# include +# include +# ifdef LOCK_FOR_NFS +# include /* for error handling of fcntl call */ +# endif +# define FILEHANDLE int +# else +# error *** Unknown operation system. Creation of lock files is not supported. *** +# endif +# define LOCK_WAIT 1 /* in seconds */ +# define MAX_LOCK_WAIT_CYCLES 60 #else -# define FILEHANDLE int +# define FILEHANDLE int #endif -// GLOBAL VARIABLES +/* GLOBAL VARIABLES */ -opt_index opt; // main option index +opt_index opt; /* main option index */ -// SEMI-GLOBAL VARIABLES +/* SEMI-GLOBAL VARIABLES */ -// defined and initialized in crosssec.c +/* defined and initialized in crosssec.c */ extern const char avg_string[]; -// defined and initialized in GenerateB.c +/* defined and initialized in GenerateB.c */ extern const char beam_descr[]; -// defined and initialized in make_particle.c +/* defined and initialized in make_particle.c */ extern const int volcor_used; extern const char sh_form_str[]; extern const int gr_N; extern const double gr_vf_real; extern const double mat_count[]; -// used in CalculateE.c -int store_int_field; // save full internal fields to text file -int store_dip_pol; // save dipole polarizations to text file -int store_beam; // save incident beam to file -int store_scat_grid; // Store the scattered field for grid of angles -int calc_Cext; // Calculate the extinction cross-section - always do -int calc_Cabs; // Calculate the absorption cross-section - always do -int calc_Csca; // Calculate the scattering cross-section by integration -int calc_vec; // Calculate the unnormalized asymmetry-parameter -int calc_asym; // Calculate the asymmetry-parameter -int calc_mat_force; // Calculate the scattering force by matrix-evaluation -int store_force; // Write radiation pressure per dipole to file +/* used in CalculateE.c */ +int store_int_field; /* save full internal fields to text file */ +int store_dip_pol; /* save dipole polarizations to text file */ +int store_beam; /* save incident beam to file */ +int store_scat_grid; /* Store the scattered field for grid of angles */ +int calc_Cext; /* Calculate the extinction cross-section - allways do */ +int calc_Cabs; /* Calculate the absorption cross-section - allways do */ +int calc_Csca; /* Calculate the scattering cross-section by integration */ +int calc_vec; /* Calculate the unnormalized asymmetry-parameter */ +int calc_asym; /* Calculate the asymmetry-parameter */ +int calc_mat_force; /* Calculate the scattering force by matrix-evaluation */ +int store_force; /* Write radiation pressure per dipole to file */ int phi_int_type; /* type of phi integration (each bit determines - * whether to calculate with different multipliers) - */ -// used in calculator.c -int avg_inc_pol; // whether to average CC over incident polarization -char alldir_parms[MAX_FNAME]; // name of file with alldir parameters -char scat_grid_parms[MAX_FNAME]; // name of file with parameters of scattering grid -// used in crosssec.c -double prop_0[3]; // initial incident direction (in laboratory reference frame) -double incPolX_0[3],incPolY_0[3]; // initial incident polarizations (in lab RF) -int ScatRelation; // type of formulae for scattering quantities -// used in GenerateB.c + whether to calculate with different multipliers) */ +/* used in calculator.c */ +int avg_inc_pol; /* whether to average CC over incident polarization */ +char alldir_parms[MAX_FNAME]; /* name of file with alldir parameters */ +char scat_grid_parms[MAX_FNAME]; /* name of file with parameters of scattering grid */ +/* used in crosssec.c */ +double prop_0[3]; /* initial incident direction (in laboratory reference frame) */ +double incPolX_0[3],incPolY_0[3]; /* initial incident polarizations (in lab RF)*/ +int ScatRelation; /* type of formulae for scattering quantities */ +/* used in GenerateB.c */ int beam_Npars; -double beam_pars[MAX_N_BEAM_PARMS]; // beam parameters -// used in io.c -char logname[MAX_FNAME]=""; // name of logfile -// used in iterative.c -double eps; // relative error to reach -// used in make_particle.c -int shape; // particle shape definition -int sh_Npars; // number of shape parameters -double sh_pars[MAX_N_SH_PARMS]; // storage for shape parameters -int sym_type; // how to treat particle symmetries -double sizeX; // size of particle along x-axis -double dpl; // number of dipoles per lambda (wavelength) -double lambda; // incident wavelength (in vacuum) -int jagged; // size of big dipoles, used to construct a particle -char shape_fname[MAX_FNAME]; // name of file, defining the shape -char save_geom_fname[MAX_FNAME]; // geometry file name to save dipole configuration -char shapename[MAX_LINE]; // name of the used shape -int volcor; // whether to use volume correction -int save_geom; // whether to save dipole configuration in .geom file -opt_index opt_sh; // option index of shape option used -double gr_vf; // granules volume fraction -double gr_d; // granules diameter -int gr_mat; // domain number to granulate -double a_eq; // volume-equivalent radius of the particle -int sg_format; // format for saving geometry files -int store_grans; // whether to save granule positions to file - -// LOCAL VARIABLES - -static char run_name[MAX_WORD]; // first part of the dir name ('run' or 'test') -static char avg_parms[MAX_FNAME]; // name of file with orientation averaging parameters -static char *exename; // name of executable (adda, adda.exe, adda_mpi,...) -static int Nmat_given; // number of refractive indices given in the command line -// structure definitions +double beam_pars[MAX_N_BEAM_PARMS]; /* beam parameters */ +/* used in io.c */ +char logname[MAX_FNAME]=""; /* name of logfile */ +/* used in iterative.c */ +double eps; /* relative error to reach */ +/* used in make_particle.c */ +int shape; /* particle shape definition */ +int sh_Npars; /* number of shape parameters */ +double sh_pars[MAX_N_SH_PARMS]; /* storage for shape parameters */ +int sym_type; /* how to treat particle symmetries */ +double sizeX; /* size of particle along x-axis */ +double dpl; /* number of dipoles per lambda (wavelength) */ +double lambda; /* incident wavelength (in vacuum) */ +int jagged; /* size of big dipoles, used to construct a particle */ +char aggregate_file[MAX_FNAME]; /* name of aggregate file */ +char save_geom_fname[MAX_FNAME]; /* geometry file name to save dipole configuration */ +char shapename[MAX_LINE]; /* name of the shape used */ +int volcor; /* whether to use volume correction */ +int save_geom; /* whether to save dipole configuration in .geom file */ +opt_index opt_sh; /* option index of shape option used */ +double gr_vf; /* granules volume fraction */ +double gr_d; /* granules diameter */ +int gr_mat; /* domain number to granulate */ +double a_eq; /* volume-equivalent radius of the particle */ +int sg_format; /* format for saving geometry files */ + +/* LOCAL VARIABLES */ + +static char run_name[MAX_WORD]; /* first part of the dir name ('run' or 'test') */ +static char avg_parms[MAX_FNAME]; /* name of file with orientation averaging parameters */ +static char *exename; /* name of executable (adda or adda.exe) */ +static int Nmat_given; /* number of refractive indices given in the command line */ + /* structure definitions */ struct subopt_struct { - const char *name; // name of option - const char *usage; // how to use (argument list) - const char *help; // help string - const int narg; /* possible number of arguments; UNDEF -> should not be checked; - * may contain also some special negative codes, like FNAME_ARG - */ - const int type; // type of suboption + const char *name; /* name of option */ + const char *usage; /* how to use (argument list) */ + const char *help; /* help string */ + const int narg; /* possible number of argumetns ; UNDEF -> should not be checked */ + const int type; /* type of suboption */ }; struct opt_struct { - const char *name; // name of option - void (*func)(int Narg,char **argv); // pointer to a function, that parse this parameter - int used; // flag to indicate, if the option was already used - const char *usage; // how to use (argument list) - const char *help; // help string - const int narg; // possible number of arguments; UNDEF -> should not be checked - const struct subopt_struct *sub; // suboptions + const char *name; /* name of option */ + void (*func)(int Narg,char **argv); /* pointer to a function, that parse this parameter */ + int used; /* flag to indicate, if the option was allready used */ + const char *usage; /* how to use (argument list) */ + const char *help; /* help string */ + const int narg; /* possible number of argumetns ; UNDEF -> should not be checked */ + const struct subopt_struct *sub; /* suboptions */ }; -// const string for usage of ADDA -static const char exeusage[]="[- [] [- ]...]]"; -/* initializations of suboptions; should be 'NULL terminated' - * each row contains: suboption name, usage string, help string, number of arguments - * (UNDEF = not checked automatically), identifier (number) - */ + /* const string for usage of ADDA */ +static const char exeusage[]="[- [] [- []...]]"; + /* initializations of suboptions; should be 'NULL terminated' + each row contains: suboption name, usage string, help string, number of arguments + (UNDEF = not checked automatically, identifier (number) */ static const struct subopt_struct beam_opt[]={ - {"plane","","Infinite plane wave",0,B_PLANE}, - {"lminus"," [ ]","Simplest approximation of the Gaussian beam. The beam " - "width is obligatory and x, y, z coordinates of the center of the beam are optional " - "parameters (all in um). By default beam center coincides with the center of the " - "computational box.",UNDEF,B_LMINUS}, - {"davis3"," [ ]","3rd order approximation of the Gaussian beam (by Davis). " - "The beam width is obligatory and x, y, z coordinates of the center of the beam are " - "optional parameters (all in um). By default beam center coincides with the center of the " - "computational box.",UNDEF,B_DAVIS3}, - {"barton5"," [ ]","5th order approximation of the Gaussian beam (by Barton). " - "The beam width is obligatory and x, y, z coordinates of the center of the beam are " - "optional parameters (all in um). By default beam center coincides with the center of the " - "computational box. This option is recommended for the description of the Gaussian beam.", - UNDEF,B_BARTON5}, - {NULL,NULL,NULL,0,0} + {"plane","","Infinite plane wave",0,B_PLANE}, + {"lminus"," [ ]","Simplest approximation of the Gaussian beam. The beam width "\ + "is obligatory and x, y, z coordinates of the center of the beam are optional parameters "\ + "(all in um). By default beam center coincides with the center of the computational box.", + UNDEF,B_LMINUS}, + {"davis3"," [ ]","3rd order approximation of the Gaussian beam (by Davis). The "\ + "beam width is obligatory and x, y, z coordinates of the center of the beam are optional "\ + "parameters (all in um). By default beam center coincides with the center of the "\ + "computational box.",UNDEF,B_DAVIS3}, + {"barton5"," [ ]","5th order approximation of the Gaussian beam (by Barton). "\ + "The beam width is obligatory and x, y, z coordinates of the center of the beam are "\ + "optional parameters (all in um). By default beam center coincides with the center of the "\ + "computational box. This option is recommended for the description of the Gaussian beam.", + UNDEF,B_BARTON5}, + {NULL,NULL,NULL,0,0} }; static const struct subopt_struct shape_opt[]={ - {"axisymmetric","","Axisymmetric homogeneous shape, defined by its contour in " - "ro-z plane of the cylindrical coordinate system. Its symmetry axis coincides with the " - "z-axis, and the contour is read from file.",FNAME_ARG,SH_AXISYMMETRIC}, - {"box","[ ]","Homogeneous cube (if no arguments are given) or a rectangular " - "parallelepiped with edges x,y,z.",UNDEF,SH_BOX}, - {"capsule","","Homogeneous capsule (cylinder with half-spherical end caps) with cylinder " - "height h and diameter d (its axis of symmetry coincides with the z-axis).",1,SH_CAPSULE}, - {"coated"," [ ]","Sphere with a spherical inclusion; outer sphere has " - "a diameter d (first domain). The included sphere has a diameter d_in (optional position " - "of the center: x,y,z).",UNDEF,SH_COATED}, - {"cylinder","","Homogeneous cylinder with height (length) h and diameter d (its axis of " - "symmetry coincides with the z-axis).",1,SH_CYLINDER}, - {"egg"," ","Axisymmetric egg shape given by a^2=r^2+nu*r*z-(1-eps)z^2, where 'a' is " - "scaling factor. Parameters must satisfy 0 ","Homogeneous general ellipsoid with semi-axes x,y,z",2,SH_ELLIPSOID}, - {"line","","Line along the x-axis with the width of one dipole",0,SH_LINE}, - {"rbc"," ","Red Blood Cell, an axisymmetric (over z-axis) biconcave " - "homogeneous particle, which is characterized by diameter d, maximum and minimum width h, " - "b, and diameter at the position of the maximum width c. The surface is described by " - "ro^4+2S*ro^2*z^2+z^4+P*ro^2+Q*z^2+R=0, ro^2=x^2+y^2, P,Q,R,S are determined by the " - "described parameters.",3,SH_RBC}, - {"read","","Read a particle geometry from file ",FNAME_ARG,SH_READ}, - {"sphere","","Homogeneous sphere",0,SH_SPHERE}, - {"spherebox","","Sphere (diameter d_sph) in a cube (size Dx, first domain)", - 1,SH_SPHEREBOX}, + {"box","[ ]","Homogenous cube (if no arguments are given) or a rectangular "\ + "parallelepiped with edges x,y,z.",UNDEF,SH_BOX}, + {"capsule","","Homogenous capsule (cylinder with half-spherical end caps) with cylinder "\ + "height h and diameter d (its axis of symmetry coincides with the z-axis).",1,SH_CAPSULE}, + {"coated"," [ ]","Sphere with a spherical inclusion; outer sphere has a "\ + "diameter d (first domain). The included sphere has a diameter d_in (optional position of "\ + "the center: x,y,z).",UNDEF,SH_COATED}, + {"cylinder","","Homogenous cylinder with height (length) h and diameter d (its axis of "\ + "symmetry coincides with the z-axis).",1,SH_CYLINDER}, + {"egg"," ","Axisymmetric egg shape given by a^2=r^2+nu*r*z-(1-eps)z^2, where 'a' "\ + "is scaling factor. Parameters must satisfy 0 ","Homogenous general ellipsoid with semi-axes x,y,z",2,SH_ELLIPSOID}, + {"line","","Line along the x-axis with the width of one dipole",0,SH_LINE}, + {"rbc"," ","Red Blood Cell, an axisymmetric (over z-axis) biconcave homogenous "\ + "particle, which is characterized by diameter d, maximum and minimum width h, b, and "\ + "diameter at the position of the maximum width c. The surface is described by "\ + "ro^4+2S*ro^2*z^2+z^4+P*ro^2+Q*z^2+R=0, ro^2=x^2+y^2, P,Q,R,S are determined by the "\ + "described parameters.",3,SH_RBC}, + {"read","","Read a particle geometry from file ",1,SH_READ}, + {"sphere","","Homogenous sphere",0,SH_SPHERE}, + {"spherebox","","Sphere (diameter d_sph) in a cube (size Dx, first domain)", + 1,SH_SPHEREBOX}, /* TO ADD NEW SHAPE - * add a row here, before null-terminating element. It contains: - * shape name (used in command line), usage string (what command line parameters can be used - * for this shape), help string (shown when -h option is used), possible number of float parameters, - * shape identifier (constant defined in const.h). Instead of number of parameters UNDEF can be - * used (if shape can accept variable number of parameters, then check it explicitly in - * function InitShape) or FNAME_ARG (if the shape accepts a single string argument with file name). - * Number of parameters should not be greater than MAX_N_SH_PARMS (defined in const.h). It is - * recommended to use dimensionless shape parameters, e.g. aspect ratios. - */ - {NULL,NULL,NULL,0,0} + add a row here, before null-terminating element. It contains: + shape name (used in command line), usage string (what command line parameters can be used + for this shape), help string (shown when -h option is used), possible number of parameters + (use UNDEF if shape can accept different number of parameters, then check it explicitly in + function InitShape), shape identifier (constant defined in const.h). Number of parameters + should not be greater than MAX_N_SH_PARMS (defined in const.h). It is recommended to use + dimensionless shape parameters, e.g. aspect ratios. */ + + {NULL,NULL,NULL,0,0} }; -// EXTERNAL FUNCTIONS +/* EXTERNAL FUNCTIONS */ -// GenerateB.c +/* GenerateB.c */ void InitBeam(void); -//======================================================================== -// declarations of parsing functions; definitions are given below. defines are for conciseness +/*========================================================================*/ + /* declarations of parsing functions; definitions are given below. + defines are for conciseness */ #define PARSE_NAME(a) parse_##a #define PARSE_FUNC(a) void PARSE_NAME(a)(int Narg,char **argv) #define PAR(a) #a,PARSE_NAME(a),FALSE @@ -252,7 +244,6 @@ PARSE_FUNC(size); PARSE_FUNC(store_beam); PARSE_FUNC(store_dip_pol); PARSE_FUNC(store_force); -PARSE_FUNC(store_grans); PARSE_FUNC(store_int_field); PARSE_FUNC(store_scat_grid); PARSE_FUNC(sym); @@ -260,1451 +251,1426 @@ PARSE_FUNC(test); PARSE_FUNC(V) ATT_NORETURN; PARSE_FUNC(vec); PARSE_FUNC(yz); -/* initialization of options, their usage and help; - * each row contains: PAR(option name),usage string, help string, number of arguments - * (UNDEF = not checked automatically),pointer to suboption (if exist) - */ + /* initialization of options, their usage and help; + each row contains: PAR(option name),usage string, help string, number of arguments + (UNDEF = not checked automatically),pointer to suboption (if exist) */ static struct opt_struct options[]={ - {PAR(alldir_inp),"","Specifies a file with parameters of the grid of scattering " - "angles for calculating integral scattering quantities.\n" - "Default: " FD_ALLDIR_PARMS,1,NULL}, - {PAR(anisotr),"","Specifies that refractive index is anisotropic (its tensor is limited to be " - "diagonal in particle reference frame). '-m' then accepts 6 arguments per each domain. " - "Can not be used with CLDR polarizability and all SO formulations.",0,NULL}, - {PAR(asym),"","Calculate the asymmetry vector. Implies '-Csca' and '-vec'",0,NULL}, - {PAR(beam)," [...]","Sets a type of the incident beam. Four other float arguments " - "must be specified for all beam types except 'plane'. These are the width and x, y, z " - "coordinates of the center of the beam respectively (all in um).\n" - "Default: plane",UNDEF,beam_opt}, - {PAR(chp_dir),"","Sets directory for the checkpoint (both for saving and loading).\n" - "Default: " FD_CHP_DIR,1,NULL}, - {PAR(chp_load),"","Restart a simulation from a checkpoint",0,NULL}, - {PAR(chp_type),"{normal|regular|always}", - "Sets type of the checkpoint. All types, except 'always', require '-chpoint'.\n" - "Default: normal",1,NULL}, - {PAR(chpoint),"