diff --git a/src/GenerateB.c b/src/GenerateB.c
index 20fa42d9..afccdd26 100644
--- a/src/GenerateB.c
+++ b/src/GenerateB.c
@@ -91,6 +91,7 @@ void InitBeam(void)
 			if (surface) {
 				// Here we set ki,kt,ktVec and propagation directions prIncRefl,prIncTran
 				if (prop_0[2]>0) { // beam comes from the substrate (below)
+					// here msub should always be defined
 					ki=msub*prop_0[2];
 					kt=cSqrtCut(1 - msub*msub*(prop_0[0]*prop_0[0]+prop_0[1]*prop_0[1]));
 					// determine propagation direction and full wavevector of wave transmitted into substrate
@@ -101,17 +102,21 @@ void InitBeam(void)
 				else if (prop_0[2]<0) { // beam comes from above the substrate
 					vRefl(prop_0,prIncRefl);
 					ki=-prop_0[2];
-					kt=cSqrtCut(msub*msub - (prop_0[0]*prop_0[0]+prop_0[1]*prop_0[1]));
-					// determine propagation direction of wave transmitted into substrate
-					ktVec[0]=prop_0[0];
-					ktVec[1]=prop_0[1];
-					ktVec[2]=-kt;
+					if (!msubInf) {
+						kt=cSqrtCut(msub*msub - (prop_0[0]*prop_0[0]+prop_0[1]*prop_0[1]));
+						// determine propagation direction of wave transmitted into substrate
+						ktVec[0]=prop_0[0];
+						ktVec[1]=prop_0[1];
+						ktVec[2]=-kt;
+					}
 				}
 				else LogError(ONE_POS,"Ambiguous setting of beam propagating along the surface. Please specify the"
 					"incident direction to have (arbitrary) small positive or negative z-component");
 				vRefl(prop_0,prIncRefl);
-				vReal(ktVec,prIncTran);
-				vNormalize(prIncTran);
+				if (!msubInf) {
+					vReal(ktVec,prIncTran);
+					vNormalize(prIncTran);
+				}
 			}
 			return;
 		case B_LMINUS:
@@ -229,7 +234,7 @@ void GenerateB (const enum incpol which,   // x - or y polarized incident light
 				 */
 				doublecomplex rc,tc; // reflection and transmission coefficients
 				if (prop[2]>0) { // beam comes from the substrate (below)
-					//  determine amplitude of the reflected and transmitted waves
+					//  determine amplitude of the reflected and transmitted waves; here msub is always defined
 					if (which==INCPOL_Y) { // s-polarized
 						cvBuildRe(ex,eIncRefl);
 						cvBuildRe(ex,eIncTran);
@@ -256,20 +261,26 @@ void GenerateB (const enum incpol which,   // x - or y polarized incident light
 					// determine amplitude of the reflected and transmitted waves
 					if (which==INCPOL_Y) { // s-polarized
 						cvBuildRe(ex,eIncRefl);
-						cvBuildRe(ex,eIncTran);
-						rc=FresnelRS(ki,kt);
-						tc=FresnelTS(ki,kt);
+						if (msubInf) rc=-1;
+						else {
+							cvBuildRe(ex,eIncTran);
+							rc=FresnelRS(ki,kt);
+							tc=FresnelTS(ki,kt);
+						}
 					}
 					else { // p-polarized
 						vInvRefl_cr(ex,eIncRefl);
-						crCrossProd(ey,ktVec,eIncTran);
-						cvMultScal_cmplx(1/msub,eIncTran,eIncTran); // normalize eIncTran by ||ktVec||=msub
-						rc=FresnelRP(ki,kt,msub);
-						tc=FresnelTP(ki,kt,msub);
+						if (msubInf) rc=1;
+						else {
+							crCrossProd(ey,ktVec,eIncTran);
+							cvMultScal_cmplx(1/msub,eIncTran,eIncTran); // normalize eIncTran by ||ktVec||=msub
+							rc=FresnelRP(ki,kt,msub);
+							tc=FresnelTP(ki,kt,msub);
+						}
 					}
 					// phase shift due to the origin at height hsub
 					cvMultScal_cmplx(rc*imExp(2*WaveNum*ki*hsub),eIncRefl,eIncRefl);
-					cvMultScal_cmplx(tc*cexp(I*WaveNum*(ki-kt)*hsub),eIncTran,eIncTran);
+					if (!msubInf) cvMultScal_cmplx(tc*cexp(I*WaveNum*(ki-kt)*hsub),eIncTran,eIncTran);
 					// main part
 					for (i=0;i<local_nvoid_Ndip;i++) {
 						j=3*i;
diff --git a/src/Makefile b/src/Makefile
index fbe9e8b7..7ef17d24 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -171,8 +171,8 @@ override EXTRA_FLAGS +=
 
 # C files are located in source folder (src/), other files may be added below
 CSOURCE := ADDAmain.c CalculateE.c calculator.c chebyshev.c comm.c crosssec.c GenerateB.c interaction.c io.c \
-           iterative.c linalg.c make_particle.c matvec.c memory.c  mt19937ar.c param.c Romberg.c sinint.c timing.c \
-           vars.c
+           iterative.c linalg.c make_particle.c matvec.c memory.c  mt19937ar.c param.c Romberg.c sinint.c somnec.c \
+           timing.c vars.c
 # Fortran files are located in src/fort folder, other files may be added below
 FSOURCE := d07hre.f d09hre.f d113re.f d132re.f dadhre.f dchhre.f dcuhre.f dfshre.f dinhre.f drlhre.f dtrhre.f \
            propaesplibreintadda.f
diff --git a/src/const.h b/src/const.h
index 054b7244..e965fda7 100644
--- a/src/const.h
+++ b/src/const.h
@@ -18,7 +18,7 @@
 #define __const_h
 
 // version number (string)
-#define ADDA_VERSION "1.3a1"
+#define ADDA_VERSION "1.3a2"
 
 /* ADDA uses certain C99 extensions, which are widely supported by GNU and Intel compilers. However, they may be not
  * completely supported by e.g. Microsoft Visual Studio compiler. Therefore, we check the version of the standard here
diff --git a/src/crosssec.c b/src/crosssec.c
index de78d308..834499a3 100644
--- a/src/crosssec.c
+++ b/src/crosssec.c
@@ -633,6 +633,10 @@ static void CalcFieldSurf(doublecomplex ebuff[static restrict 3], // where to wr
 		nN[2]*=-1;
 	}
 	else { // transmission
+		if (msubInf) { // no transmission for perfectly reflecting substrate => zero result
+			cvInit(ebuff);
+			return;
+		}
 		doublecomplex eps=msub*msub;
 		// effective eps and (real part of) refractive index
 		doublecomplex epsEff=(creal(eps) + I*cimag(eps)/nF[2]);
@@ -728,21 +732,28 @@ static void CalcFieldSurf(doublecomplex ebuff[static restrict 3], // where to wr
 	 * quantities, like Csca (if sufficient very number of integration points is chosen)
 	 */
 	ki=-nN[2];
-	if (cabs(msub-1)<ROUND_ERR && fabs(ki)<ROUND_ERR) kt=ki; // special case to avoid randomness due to round-off errors
-	else kt=cSqrtCut(msub*msub - (nN[0]*nN[0]+nN[1]*nN[1]));
-	if (above) {
-		/* here we test only the exact zero, since for other cases (when msub=1, and very small values of ki,kt) the
-		 * above assignment kt=ki guarantees correct results through standard functions
-		 */
-		if (ki==0 && kt==0) cs=cp=0;
-		else {
-			cs=FresnelRS(ki,kt);
-			cp=FresnelRP(ki,kt,msub);
-		}
+	if (msubInf) {
+		cs=-1;
+		cp=1;
+		kt=NAN; // redundant to remove warnings below
 	}
-	else { // below surface
-		cs=FresnelTS(ki,kt);
-		cp=FresnelTP(ki,kt,msub);
+	else {
+		if (cabs(msub-1)<ROUND_ERR && fabs(ki)<ROUND_ERR) kt=ki; // special case to avoid randomness due to round-off errors
+		else kt=cSqrtCut(msub*msub - (nN[0]*nN[0]+nN[1]*nN[1]));
+		if (above) {
+			/* here we test only the exact zero, since for other cases (when msub=1, and very small values of ki,kt) the
+			 * above assignment kt=ki guarantees correct results through standard functions
+			 */
+			if (ki==0 && kt==0) cs=cp=0;
+			else {
+				cs=FresnelRS(ki,kt);
+				cp=FresnelRP(ki,kt,msub);
+			}
+		}
+		else { // below surface
+			cs=FresnelTS(ki,kt);
+			cp=FresnelTP(ki,kt,msub);
+		}
 	}
 		// set unit vectors for s- and p-polarizations; es is the same for all cases
 	if (vAlongZ(nF)) { // special case - es=ey
@@ -756,7 +767,7 @@ static void CalcFieldSurf(doublecomplex ebuff[static restrict 3], // where to wr
 	}
 	CrossProd(es,nN,epN); // epN = es x nN
 		// epF is different for reflected and transmitted (similar to the code in GenerateB.c)
-	if (above || cimag(msub)==0) { // epF = es x nF; also includes simple case with real ktVec
+	if (above || cimag(msub)==0) { // epF = es x nF; also includes simple cases with real ktVec and msubInf
 		double epFRe[3];
 		CrossProd(es,nF,epFRe);
 		cvBuildRe(epFRe,epF);
@@ -1093,7 +1104,7 @@ static double CscaIntegrand(const int theta,const int phi,double * restrict res)
 	 * still unclear (especially for strong absorption). So we use Re[msub]*||E||^2 in all cases, which should be
 	 * exact for real msub, and a good approximation in the case of (weakly) absorbing medium.
 	 */
-	if (surface && cos(Deg2Rad(theta_int.val[theta]))<=-ROUND_ERR) res[0]*=creal(msub);
+	if (surface && !msubInf && cos(Deg2Rad(theta_int.val[theta]))<=-ROUND_ERR) res[0]*=creal(msub);
 	return 0;
 }
 
@@ -1134,7 +1145,7 @@ static double gIntegrand(const int theta,const int phi,double * restrict res)
 	 *
 	 * Moreover, after such modification g = (gCsca)/Csca has very little sense, in particular it is not <cos(th)>
 	 */
-	if (surface && cos(th)<=-ROUND_ERR) E_square*=creal(msub)*creal(msub);
+	if (surface && !msubInf && cos(th)<=-ROUND_ERR) E_square*=creal(msub)*creal(msub);
 	res[0] = E_square*sin(th)*cos(ph);
 	res[1] = E_square*sin(th)*sin(ph);
 	res[2] = E_square*cos(th);
@@ -1173,7 +1184,7 @@ static double gxIntegrand(const int theta,const int phi,double * restrict res)
 	else {
 		res[0]=E2_alldir[AlldirIndex(theta,phi)]*sin(Deg2Rad(th))*cos(Deg2Rad(phi_int.val[phi]));
 		// the following correction is explained in gIntegrand()
-		if (surface && cos(Deg2Rad(th))<=-ROUND_ERR) res[0]*=creal(msub)*creal(msub);
+		if (surface && !msubInf && cos(Deg2Rad(th))<=-ROUND_ERR) res[0]*=creal(msub)*creal(msub);
 	}
 	return 0;
 }
@@ -1208,7 +1219,7 @@ static double gyIntegrand(const int theta,const int phi,double * restrict res)
 	else {
 		res[0]=E2_alldir[AlldirIndex(theta,phi)]*sin(Deg2Rad(th))*sin(Deg2Rad(phi_int.val[phi]));
 		// the following correction is explained in gIntegrand()
-		if (surface && cos(Deg2Rad(th))<=-ROUND_ERR) res[0]*=creal(msub)*creal(msub);
+		if (surface && !msubInf && cos(Deg2Rad(th))<=-ROUND_ERR) res[0]*=creal(msub)*creal(msub);
 	}
 	return 0;
 }
@@ -1237,7 +1248,7 @@ static double gzIntegrand(const int theta,const int phi,double * restrict res)
 	double th=Deg2Rad(theta_int.val[theta]);
 	res[0]=E2_alldir[AlldirIndex(theta,phi)]*cos(th);
 	// the following correction is explained in gIntegrand()
-	if (surface && cos(th)<=-ROUND_ERR) res[0]*=creal(msub)*creal(msub);
+	if (surface && !msubInf  && cos(th)<=-ROUND_ERR) res[0]*=creal(msub)*creal(msub);
 	return 0;
 }
 
diff --git a/src/interaction.c b/src/interaction.c
index 5509e742..61af9258 100644
--- a/src/interaction.c
+++ b/src/interaction.c
@@ -17,6 +17,7 @@
 #include "interaction.h" // corresponding headers
 // project headers
 #include "cmplx.h"
+#include "comm.h"
 #include "io.h"
 #include "memory.h"
 #include "vars.h"
@@ -46,6 +47,9 @@ static int ** restrict tab_index; // matrix for indexing of table arrays
 // KroneckerDelta[mu,nu] - can serve both as multiplier, and as bool
 static const double dmunu[6] = {1.0, 0.0, 0.0, 1.0, 0.0, 1.0};
 static doublecomplex surfRCn; // reflection coefficient for normal incidence
+static bool XlessY; // whether boxX is not larger than boxY (used for SomTable)
+static doublecomplex * restrict somTable; // table of Sommerfeld integrals
+static size_t * restrict somIndex; // array for indexing somTable (in the xy-plane)
 
 #ifdef USE_SSE3
 static __m128d c1, c2, c3, zo, inv_2pi, p360, prad_to_deg;
@@ -61,10 +65,13 @@ void propaespacelibreintadda_(const double *Rij,const double *ka,const double *a
 #endif
 // sinint.c
 void cisi(double x,double *ci,double *si);
+// somnec.c (copied from somnec.h, but we do not include the whole header to avoid conflicts)
+void som_init(complex double epscf);
+void evlua(double zphIn,double rhoIn,complex double *erv, complex double *ezv,complex double *erh, complex double *eph);
 
 // this is used for debugging, should be empty define, when not required
-#define PRINT_GVAL /*printf("%d,%d,%d: %g%+gi, %g%+gi, %g%+gi,\n%g%+gi, %g%+gi, %g%+gi\n",i,j,k,REIM(result[0]),\
-	REIM(result[1]),REIM(result[2]),REIM(result[3]),REIM(result[4]),REIM(result[5]));*/
+#define PRINT_GVAL /*printf("%s: %d,%d,%d: %g%+gi, %g%+gi, %g%+gi,\n%g%+gi, %g%+gi, %g%+gi\n",__func__,i,j,k,\
+	REIM(result[0]),REIM(result[1]),REIM(result[2]),REIM(result[3]),REIM(result[4]),REIM(result[5]));*/
 
 //=====================================================================================================================
 /* The following two functions are used to do common calculation parts. For simplicity it is best to call them with the
@@ -851,7 +858,7 @@ void CalcReflTerm_img(const int i,const int j,const int k,doublecomplex result[s
 	CalcInterParams2(qvec,qmunu,rn,&invrn,&invr3,&kr,&kr2);
 	// this is also a modification of simple Green's tensor, multiplying by refl. coef. and inverting z-components
 	const double t1=(3-kr2), t2=-3*kr, t3=(kr2-1);
-	expval=surfRCn*invr3*imExp(kr);
+	expval=surfRCn*invr3*accImExp(kr);
 #define INTERACT_DIAG(ind) { result[ind] = ((t1*qmunu[ind]+t3) + I*(kr+t2*qmunu[ind]))*expval; }
 #define INTERACT_NONDIAG(ind) { result[ind] = (t1+I*t2)*qmunu[ind]*expval; }
 	INTERACT_DIAG(0);    // xx
@@ -871,10 +878,127 @@ void CalcReflTerm_img(const int i,const int j,const int k,doublecomplex result[s
 
 //=====================================================================================================================
 
+static void evluaWrapper(const double x,const double y,const double z,const double scale,const double isc,
+	doublecomplex *vals)
+{
+	double rho=sqrt(x*x+y*y)*scale;
+	if (rho==0) rho=z*0.00000001; // a hack to overcome the poor precision of somnec for rho=0;
+	evlua(z,rho,vals,vals+1,vals+2,vals+3);
+	// scale integrals (this can be removed by changes in som_init to use proper wavenumber instead of 2*pi)
+	vals[0]*=isc;
+	vals[1]*=isc;
+	vals[2]*=isc;
+	vals[3]*=isc;
+}
+
+//=====================================================================================================================
+
+static void CalcSomTable(void)
+/* calculates a table of (essential Sommerfeld integrals), which are further combined into reflected Green's tensor
+ * For z values - all local grid; for x- and y-values only positive values are considered and additionally y<=x.
+ *
+ * That is good for FFT code, since all these values are required anyway. A minor improvement can be achieved by
+ * locating different pairs of i,j that lead to the same rho (like 3,4 and 5,0), but the fraction of such matching pairs
+ * is very small (also see below). Another way for improvement is to set a (coarser) 2D grid in plane of z-rho and
+ * perform interpolation on it (as done in Schmehl's thesis). But that will add another free parameter affecting the
+ * final accuracy.
+ *
+ * However, in sparse mode this procedure is inefficient, since incurs (potentially) a lot of unnecessary evaluations
+ * of Sommerfeld integrals. For really sparse aggregates the better way is to buildup a lookup table, using only
+ * actually used pairs of (z,rho), as is done in DDA-SI code. A hash table can be used for that.
+ * TODO: Implement such improvement for the sparse mode
+ */
+{
+	// TOSO: those ranges are correct for SPARSE, but should be modified for the FFT version
+	int i,j,k;
+	const double scale=kd/TWO_PI;
+	const double isc=pow(WaveNum/TWO_PI,3); // this is subject to under/overflow
+	double y,z;
+	size_t ind;
+
+	XlessY=(boxX<=boxY);
+	// create index for plane x,y; if boxX<=boxY the space above the main diagonal is indexed (so x<=y) and vice versa
+	MALLOC_VECTOR(somIndex,sizet,boxY+1,ALL);
+	memory+=(boxY+1)*sizeof(size_t);
+	somIndex[0]=0;
+	for (j=0;j<boxY;j++) somIndex[j+1]=somIndex[j] + (XlessY ? MIN(j+1,boxX) : (boxX-j));
+	// allocate and fill the table
+	// TODO: add the required memory to calculator.c (and the manual)
+	const size_t tmp=4*(2*boxZ-1)*somIndex[boxY];
+	MALLOC_VECTOR(somTable,complex,tmp,ALL);
+	memory+=tmp*sizeof(doublecomplex);
+	if (IFROOT) printf("Calculating table of Sommerfeld integrals\n");
+	ind=0;
+	for (k=0;k<(2*boxZ-1);k++) {
+		z=(k+ZsumShift)*scale;
+		for (j=0;j<boxY;j++) {
+			y=j;
+			if (XlessY) for (i=0;i<=j && i<boxX;i++,ind++) evluaWrapper(i,y,z,scale,isc,somTable+4*ind);
+			else for (i=j;i<boxX;i++,ind++) evluaWrapper(i,y,z,scale,isc,somTable+4*ind);
+		}
+	}
+}
+
+//=====================================================================================================================
+
+static void GetSomIntegral(const int i,const int j,const int k,doublecomplex result[static restrict 6])
+// Get Sommerfeld-integral part of the reflected Green's tensor; result is _incremented_ by the new value
+{
+	double x,y;   // coordinates in units of d
+	double rho;   // cylindrical coordinate
+	double xr,yr; // relative (scaled by ro) transverse coordinates
+	doublecomplex Irv,Izv,Irh,Iph; // values of Sommerfeld integrals
+	size_t ind;   // index for the table
+	int iT,jT;    // transformed indices
+
+	// compute xr,yr
+	x=i;
+	y=j;
+	rho=sqrt(x*x+y*y);
+	if (rho==0) { // particular direction doesn't matter in this case
+		xr=1;
+		yr=0;
+	}
+	else {
+		xr=x/rho;
+		yr=y/rho;
+	}
+	// compute table index
+	iT=abs(i);
+	jT=abs(j);
+	if (XlessY) {
+		if (iT<=jT) ind=somIndex[jT]+iT;
+		else ind=somIndex[iT]+jT; // effectively swap iT and jT
+	}
+	else {
+		if (iT>=jT) ind=somIndex[jT]+iT-jT;
+		else ind=somIndex[iT]+jT-iT; // effectively swap iT and jT
+	}
+	ind=4*(ind+k*somIndex[boxY]);
+	// index tables
+	Irv=somTable[ind];
+	Izv=somTable[ind+1];
+	Irh=somTable[ind+2];
+	Iph=somTable[ind+3];
+	// update result
+	result[0] += xr*xr*Irh - yr*yr*Iph;
+	result[1] += xr*yr*(Irh+Iph);
+	result[2] += xr*Irv;
+	result[3] += yr*yr*Irh - xr*xr*Iph;
+	result[4] += yr*Irv;
+	result[5] += Izv;
+}
+
+//=====================================================================================================================
+
 void CalcReflTerm_som(const int i,const int j,const int k,doublecomplex result[static restrict 6])
 // Reflection term using the Sommerfeld integrals; arguments are described in .h file
 {
+	CalcReflTerm_img(i,j,k,result);
+	GetSomIntegral(i,j,k,result);
+	PRINT_GVAL;
 }
+
 //=====================================================================================================================
 
 void InitInteraction(void)
@@ -900,9 +1024,13 @@ void InitInteraction(void)
 	if (surface) {
 		switch (ReflRelation) {
 			case GR_IMG: CalcReflTerm = &CalcReflTerm_img; break;
-			case GR_SOM: CalcReflTerm = &CalcReflTerm_som; break;
+			case GR_SOM:
+				CalcReflTerm = &CalcReflTerm_som;
+				som_init(msub*msub);
+				CalcSomTable();
+				break;
 		}
-		surfRCn=(1-msub*msub)/(1+msub*msub);
+		surfRCn=msubInf ? -1 : ((1-msub*msub)/(1+msub*msub));
 	}
 
 #ifdef USE_SSE3
@@ -927,4 +1055,8 @@ void FreeInteraction(void)
 // Free buffers used for interaction calculation
 {
 	if (IntRelation == G_SO || IntRelation == G_IGT_SO) FreeTables();
+	if (surface && ReflRelation==GR_SOM) {
+		Free_general(somIndex);
+		Free_cVector(somTable);
+	}
 }
diff --git a/src/memory.c b/src/memory.c
index 5199a795..63eaca82 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -238,6 +238,19 @@ bool *boolVector(const size_t size,OTHER_ARGUMENTS)
 
 //======================================================================================================================
 
+size_t *sizetVector(const size_t size,OTHER_ARGUMENTS)
+// allocates bool vector
+{
+	size_t * restrict v;
+
+	CHECK_SIZE(size,size_t);
+	v=(size_t *)malloc(size*sizeof(size_t));
+	CHECK_NULL(size,v);
+	return v;
+}
+
+//======================================================================================================================
+
 void *voidVector(const size_t size,OTHER_ARGUMENTS)
 // allocates void vector
 {
diff --git a/src/memory.h b/src/memory.h
index 082c6b96..c7809746 100644
--- a/src/memory.h
+++ b/src/memory.h
@@ -43,6 +43,7 @@ unsigned short *ushortVector(size_t size,OTHER_ARGUMENTS) ATT_MALLOC;
 char *charVector(size_t size,OTHER_ARGUMENTS) ATT_MALLOC;
 unsigned char *ucharVector(size_t size,OTHER_ARGUMENTS) ATT_MALLOC;
 bool *boolVector(size_t size,OTHER_ARGUMENTS) ATT_MALLOC;
+size_t *sizetVector(size_t size,OTHER_ARGUMENTS) ATT_MALLOC;
 void *voidVector(size_t size,OTHER_ARGUMENTS) ATT_MALLOC;
 // reallocate; only two for now, more can be easily added
 double *doubleRealloc(double *ptr,const size_t size,OTHER_ARGUMENTS) ATT_MALLOC;
diff --git a/src/param.c b/src/param.c
index 0105afd7..0ac8df4d 100644
--- a/src/param.c
+++ b/src/param.c
@@ -160,10 +160,14 @@ static const char *avg_parms;   // name of file with orientation averaging param
 static const char *exename;     // name of executable (adda, adda.exe, adda_mpi,...)
 static int Nmat_given;          // number of refractive indices given in the command line
 static enum sym sym_type;       // how to treat particle symmetries
+/* The following '..._used' flags are, in principle, redundant, since the structure 'options' contains the same flags.
+ * However, the latter can't be easily addressed by the option name (a search over the whole options is required).
+ */
 static bool prop_used;          // whether '-prop ...' was used in the command line
 static bool orient_used;        // whether '-orient ...' was used in the command line
 static bool yz_used;            // whether '-yz ...' was used in the command line
 static bool scat_plane_used;    // whether '-scat_plane ...' was used in the command line
+static bool int_surf_used;      // whether '-int_surf ...' was used in the command line
 
 /* TO ADD NEW COMMAND LINE OPTION
  * If you need new variables or flags to implement effect of the new command line option, define them here. If a
@@ -473,7 +477,7 @@ static struct opt_struct options[]={
 	#ifdef SPARSE
 			"!!! In sparse mode 'som' is expected to be very slow.\n"
 	#endif
-			"Default: som",1,NULL},
+			"Default: som (but 'img' if surface is perfectly reflecting)",1,NULL},
 	{PAR(iter),"{bcgs2|bicg|bicgstab|cgnr|csym|qmr|qmr2}","Sets the iterative solver.\n"
 		"Default: qmr",1,NULL},
 		/* TO ADD NEW ITERATIVE SOLVER
@@ -589,10 +593,12 @@ static struct opt_struct options[]={
 #endif
 	{PAR(store_int_field),"","Save internal fields to a file",0,NULL},
 	{PAR(store_scat_grid),"","Calculate Mueller matrix for a grid of scattering angles and save it to a file.",0,NULL},
-	{PAR(surf),"<mre> <mim> <h>","Specifies that scatterer is located above the plane surface, parallel to the "
-		"xy-plane. First two arguments specify the refractive index of the substrate (below the surface), assuming that "
-		"the vacuum is above the surface. <h> specifies the height of particle center above the surface (along the "
-		"z-axis, in um). Particle must be entirely above the substrate.",3,NULL},
+	{PAR(surf),"<h> {<mre> <mim>|inf}","Specifies that scatterer is located above the plane surface, parallel to the "
+		"xy-plane. <h> specifies the height of particle center above the surface (along the z-axis, in um). Particle "
+		"must be entirely above the substrate. Following argument(s) specify the refractive index of the substrate "
+		"(below the surface), assuming that the vacuum is above the surface. It is done either by two values (real and "
+		"imaginary parts of the complex value) or as effectively infinite 'inf' which corresponds to perfectly"
+		"reflective surface. The latter implies certain simplifications during calculations.",UNDEF,NULL},
 	{PAR(sym),"{auto|no|enf}","Automatically determine particle symmetries ('auto'), do not take them into account "
 		"('no'), or enforce them ('enf').\n"
 		"Default: auto",1,NULL},
@@ -1132,6 +1138,7 @@ PARSE_FUNC(int_surf)
 	if (strcmp(argv[1],"img")==0) ReflRelation=GR_IMG;
 	else if (strcmp(argv[1],"som")==0) ReflRelation=GR_SOM;
 	else NotSupported("Interaction term prescription",argv[1]);
+	int_surf_used=true;
 }
 PARSE_FUNC(iter)
 {
@@ -1393,12 +1400,19 @@ PARSE_FUNC(store_scat_grid)
 PARSE_FUNC(surf)
 {
 	double mre,mim;
-
-	ScanDoubleError(argv[1],&mre);
-	ScanDoubleError(argv[2],&mim);
-	msub = mre + I*mim;
-	ScanDoubleError(argv[3],&hsub);
+	if (Narg!=2 && Narg!=3) NargError(Narg,"2 or 3");
+	ScanDoubleError(argv[1],&hsub);
 	TestPositive(hsub,"height above surface");
+	if (Narg==2) {
+		if (strcmp(argv[2],"inf")==0) msubInf=true;
+		else PrintErrorHelp(
+			"Illegal number of arguments (%d) to '-surf' option (total 3 or second argument 'inf' is expected)",Narg);
+	}
+	else { // Narg==3
+		ScanDoubleError(argv[2],&mre);
+		ScanDoubleError(argv[3],&mim);
+		msub = mre + I*mim;
+	}
 	surface = true;
 	symZ=false;
 }
@@ -1767,7 +1781,8 @@ void InitVariables(void)
 	InitField=IF_AUTO;
 	recalc_resid=false;
 	surface=false;
-	ReflRelation=GR_SOM;
+	msubInf=false;
+	int_surf_used=false;
 	// sometimes the following two are left uninitialized
 	beam_fnameX=NULL;
 	infi_fnameX=NULL;
@@ -1840,7 +1855,7 @@ void VariablesInterconnect(void)
 	// set default incident direction, which is +z for all configurations
 	if (!prop_used) {
 		prop_0[0]=prop_0[1]=0;
-		prop_0[2] = 1;
+		prop_0[2]=1;
 	}
 	// parameter interconnections
 	/* very unlikely that calc_Cabs will ever be false, but strictly speaking dCabs should be calculated before Cext,
@@ -1920,8 +1935,13 @@ void VariablesInterconnect(void)
 		if (calc_mat_force) PrintError("Currently calculation of radiation forces is incompatible with '-surf'");
 		if (beamtype!=B_PLANE) PrintError("Currently non-plane incident wave is incompatible with '-surf'");
 		if (InitField==IF_WKB) PrintError("'-init_field wkb' and '-surf' can not be used together");
-		if (prop_0[2]==0) PrintError("Ambiguous setting of beam propagating along the surface. Please specify the"
+		if (prop_0[2]==0) PrintError("Ambiguous setting of beam propagating along the surface. Please specify the "
 			"incident direction to have (arbitrary) small positive or negative z-component");
+		if (msubInf && prop_0[2]>0) PrintError("Perfectly reflecting surface ('-surf ... inf') is incompatible with "
+			"incident direction from below (including the default one)");
+		if (!int_surf_used) ReflRelation = msubInf ? GR_IMG : GR_SOM;
+		else if (msubInf && ReflRelation!=GR_IMG) PrintError("For perfectly reflecting surface interaction is always "
+			"computed through an image dipole. So this case is incompatible with other options to '-int_surf ...'");
 	}
 #ifdef SPARSE
 	if (shape==SH_SPHERE) PrintError("Sparse mode requires shape to be read from file (-shape read ...)");
@@ -2144,8 +2164,11 @@ void PrintInfo(void)
 				}
 			}
 		}
-		if (surface) fprintf(logfile,"Particle is placed near the substrate with refractive index "CFORM",\n"
-				"  height of the particle center: "GFORMDEF"\n",REIM(msub),hsub);
+		if (surface) {
+			if (msubInf) fprintf(logfile,"Particle is placed near the perfectly reflecting substrate\n");
+			else fprintf(logfile,"Particle is placed near the substrate with refractive index "CFORM",\n",REIM(msub));
+			fprintf(logfile,"  height of the particle center: "GFORMDEF"\n",hsub);
+		}
 		fprintf(logfile,"Dipoles/lambda: "GFORMDEF"\n",dpl);
 		if (volcor_used) fprintf(logfile,"\t(Volume correction used)\n");
 		fprintf(logfile,"Required relative residual norm: "GFORMDEF"\n",iter_eps);
@@ -2162,9 +2185,13 @@ void PrintInfo(void)
 		fprintf(logfile,"Incident polarization X(per): "GFORMDEF3V"\n",COMP3V(incPolX_0));
 		if (surface) { // include surface-specific vectors
 			fprintf(logfile,"Reflected propagation vector: "GFORMDEF3V"\n",COMP3V(prIncRefl));
-			fprintf(logfile,"Transmitted propagation vector: "GFORMDEF3V,COMP3V(prIncTran));
-			if (prIncTran[2]==0) fprintf(logfile," (evanescent)\n");
-			else fprintf(logfile,"\n");
+			fprintf(logfile,"Transmitted propagation vector: ");
+			if (msubInf) fprintf (logfile,"none\n");
+			else {
+				fprintf(logfile,GFORMDEF3V,COMP3V(prIncTran));
+				if (prIncTran[2]==0) fprintf(logfile," (evanescent)\n");
+				else fprintf(logfile,"\n");
+			}
 		}
 		fprintf(logfile,"\n");
 		// log particle orientation
diff --git a/src/somnec.c b/src/somnec.c
new file mode 100644
index 00000000..eaba321a
--- /dev/null
+++ b/src/somnec.c
@@ -0,0 +1,1011 @@
+/* last change:  pgm   8 nov 2000    1:04 pm */
+/* program somnec(input,output,tape21) */
+
+/* Original code is NEC2C (in public domain) obtained from http://www.si-list.net/swindex.html,
+ * http://sharon.esrac.ele.tue.nl/users/pe1rxq/nec2c.rxq/nec2c.rxq-0.2.tar.gz
+ * This code is incorporated into ADDA with several changes, which are explicitly indicated by comments. Those include:
+ * - no generation of interpolation grid, only single run
+ * - numerical precision was changed to double
+ * - conjugation (that was in plance to couple with other parts of nec2 code) was removed
+ */
+
+/* TODO: Systematic accuracy study of this code is required. At least 7 digits of precision are desired (for test runs)
+ * However, the urgent thing is that for some reason the accuracy of this code is very bad (few percent errors for
+ * rho=0, exactly).
+ */
+
+// ADDA: the cycling to generate interpolation grid was removed, now it is a single-run routine
+/* program to generate nec interpolation grids for fields due to */
+/* ground.  field components are computed by numerical evaluation */
+/* of modified sommerfeld integrals. */
+// ADDA: double precision is now used throughout
+/* somnec2d is a double precision version of somnec for use with */
+/* nec2d.  an alternate version (somnec2sd) is also provided in which */
+/* computation is in single precision but the output file is written */
+/* in double precision for use with nec2d.  somnec2sd runs about twic */
+/* as fast as the full double precision somnec2d.  the difference */
+/* between nec2d results using a for021 file from this code rather */
+/* than from somnec2sd was insignficant in the cases tested. */
+
+/* changes made by j bergervoet, 31-5-95: */
+/* parameter 0. --> 0.d0 in calling of routine test */
+/* status of output files set to 'unknown' */
+
+// ADDA: The following definitions are extracted from the file nec2c.h (only those that are used here)
+#include <complex.h>
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+
+#ifndef	TRUE
+#define	TRUE	1
+#endif
+#ifndef	FALSE
+#define	FALSE	0
+#endif
+
+/* commonly used complex constants */
+#define	CPLX_00	(0.0+0.0fj)
+#define	CPLX_01	(0.0+1.0fj)
+#define	CPLX_10	(1.0+0.0fj)
+#define	CPLX_11	(1.0+1.0fj)
+
+/* common constants */
+#define	PI	3.141592654
+#define	TP	6.283185308
+#define	PTP	.6283185308
+#define PI10	31.41592654
+#define	GAMMA	.5772156649
+#define C1	-.02457850915
+#define C2	.3674669052
+#define C3	.7978845608
+#define P10	.0703125
+#define P20	.1121520996
+#define Q10	.125
+#define Q20	.0732421875
+#define P11	.1171875
+#define P21	.1441955566
+#define Q11	.375
+#define Q21	.1025390625
+#define POF	.7853981635
+/* MAXH=100 seems sufficient to obtain convergence in all cases; larger values doesn't seem to improve the accuracy
+ * even when smaller CRIT is used. Probably other approximations are employed somewhere.
+ */
+#define MAXH	100
+#define CRIT	1.0E-4
+#define NM	131072
+#define NTS	4
+
+#define cmplx(r, i) ((r)+(i)*CPLX_01)
+
+void 	som_init(complex double epscf);
+static void 	bessel(complex double z, complex double *j0, complex double *j0p);
+void 	evlua(double zphIn,double rhoIn,complex double *erv, complex double *ezv,
+	complex double *erh, complex double *eph);
+static void 	gshank(complex double start, complex double dela, complex double *sum,
+	int nans, complex double *seed, int ibk, complex double bk, complex double delb);
+static void 	hankel(complex double z, complex double *h0, complex double *h0p);
+static void 	lambda(double t, complex double *xlam, complex double *dxlam);
+static void 	rom1(int n, complex double *sum, int nx);
+static void 	saoa( double t, complex double *ans);
+
+static void 	test(double f1r, double f2r, double *tr, double f1i,
+	double f2i, double *ti, double dmin);
+static void 	abort_on_error(int why);
+
+/* common /evlcom/ */
+static int jh;
+static double ck2, ck2sq, tkmag, tsmag, ck1r, zph, rho;
+static complex double ct1, ct2, ct3, ck1, ck1sq, cksm;
+
+/* common /cntour/ */
+static complex double a, b;
+
+/*-----------------------------------------------------------------------*/
+
+void som_init(complex double epscf)
+{
+  complex double erv, ezv;
+
+  ck2=TP;
+  ck2sq=ck2*ck2;
+
+  /* sommerfeld integral evaluation uses exp(-jwt) */
+  // ADDA: removed conjugation of epscf and in evaluation of integrals below
+
+  ck1sq=ck2sq*epscf;
+  ck1=csqrt(ck1sq);
+  ck1r=creal(ck1);
+  tkmag=100.*cabs(ck1);
+  tsmag=100.*ck1*conj(ck1);
+  cksm=ck2sq/(ck1sq+ck2sq);
+  ct1=.5*(ck1sq-ck2sq);
+  erv=ck1sq*ck1sq;
+  ezv=ck2sq*ck2sq;
+  ct2=.125*(erv-ezv);
+  erv *= ck1sq;
+  ezv *= ck2sq;
+  ct3=.0625*(erv-ezv);
+
+  return;
+}
+
+/*-----------------------------------------------------------------------*/
+
+/* bessel evaluates the zero-order bessel function */
+/* and its derivative for complex argument z. */
+static void bessel(complex double z, complex double *j0, complex double *j0p )
+{
+  int k, i, ib, iz, miz;
+  static int m[101], init = FALSE;
+  static double a1[25], a2[25];
+  double tst, zms;
+  complex double p0z, p1z, q0z, q1z, zi, zi2, zk, cz, sz, j0x, j0px;
+
+  /* initialization of constants */
+  if( ! init )
+  {
+    for( k = 1; k <= 25; k++ )
+    {
+      i = k-1;
+      a1[i]=-.25/(k*k);
+      a2[i]=1.0/(k+1.0);
+    }
+
+    for( i = 1; i <= 101; i++ )
+    {
+      tst=1.0;
+      for( k = 0; k < 24; k++ )
+      {
+	init = k;
+	tst *= -i*a1[k];
+	if( tst < 1.0e-6 )
+	  break;
+      }
+
+      m[i-1] = init+1;
+    } /* for( i = 1; i<= 101; i++ ) */
+
+    init = TRUE;
+  } /* if(init == 0) */
+
+  zms=z*conj(z);
+  if(zms <= 1.e-12)
+  {
+    *j0=CPLX_10;
+    *j0p=-.5*z;
+    return;
+  }
+
+  ib=0;
+  if(zms <= 37.21)
+  {
+    if(zms > 36.)
+      ib=1;
+
+    /* series expansion */
+    iz=zms;
+    miz=m[iz];
+    *j0=CPLX_10;
+    *j0p=*j0;
+    zk=*j0;
+    zi=z*z;
+
+    for( k = 0; k < miz; k++ )
+    {
+      zk *= a1[k]*zi;
+      *j0 += zk;
+      *j0p += a2[k]*zk;
+    }
+    *j0p *= -.5*z;
+
+    if(ib == 0)
+      return;
+
+    j0x=*j0;
+    j0px=*j0p;
+  }
+
+  /* asymptotic expansion */
+  zi=1./z;
+  zi2=zi*zi;
+  p0z=1.+(P20*zi2-P10)*zi2;
+  p1z=1.+(P11-P21*zi2)*zi2;
+  q0z=(Q20*zi2-Q10)*zi;
+  q1z=(Q11-Q21*zi2)*zi;
+  zk=cexp(CPLX_01*(z-POF));
+  zi2=1./zk;
+  cz=.5*(zk+zi2);
+  sz=CPLX_01*.5*(zi2-zk);
+  zk=C3*csqrt(zi);
+  *j0=zk*(p0z*cz-q0z*sz);
+  *j0p=-zk*(p1z*sz+q1z*cz);
+
+  if(ib == 0)
+    return;
+
+  zms=cos((sqrt(zms)-6.)*PI10);
+  *j0=.5*(j0x*(1.+zms)+ *j0*(1.-zms));
+  *j0p=.5*(j0px*(1.+zms)+ *j0p*(1.-zms));
+
+  return;
+}
+
+/*-----------------------------------------------------------------------*/
+
+/* evlua controls the integration contour in the complex */
+/* lambda plane for evaluation of the sommerfeld integrals */
+void evlua(double zphIn,double rhoIn, complex double *erv, complex double *ezv,
+    complex double *erh, complex double *eph )
+{
+  int i, jump;
+  static double del, slope, rmis;
+  static complex double cp1, cp2, cp3, bk, delta, delta2, sum[6], ans[6];
+
+  zph=zphIn;
+  rho=rhoIn;
+
+  del=zph;
+  if( rho > del )
+    del=rho;
+
+  if(zph >= 2.*rho)
+  {
+    /* bessel function form of sommerfeld integrals */
+    jh=0;
+    a=CPLX_00;
+    del=1./del;
+
+    if( del > tkmag)
+    {
+      b=cmplx(.1*tkmag,-.1*tkmag);
+      rom1(6,sum,2);
+      a=b;
+      b=cmplx(del,-del);
+      rom1 (6,ans,2);
+      for( i = 0; i < 6; i++ )
+	sum[i] += ans[i];
+    }
+    else
+    {
+      b=cmplx(del,-del);
+      rom1(6,sum,2);
+    }
+
+    delta=PTP*del;
+    gshank(b,delta,ans,6,sum,0,b,b);
+    ans[5] *= ck1;
+
+    /* ADDA: conjugate was removed */
+    *erv=ck1sq*ans[2];
+    *ezv=ck1sq*(ans[1]+ck2sq*ans[4]);
+    *erh=ck2sq*(ans[0]+ans[5]);
+    *eph=-ck2sq*(ans[3]+ans[5]);
+
+    return;
+
+  } /* if(zph >= 2.*rho) */
+
+  /* hankel function form of sommerfeld integrals */
+  jh=1;
+  cp1=cmplx(0.0,.4*ck2);
+  cp2=cmplx(.6*ck2,-.2*ck2);
+  cp3=cmplx(1.02*ck2,-.2*ck2);
+  a=cp1;
+  b=cp2;
+  rom1(6,sum,2);
+  a=cp2;
+  b=cp3;
+  rom1(6,ans,2);
+
+  for( i = 0; i < 6; i++ )
+    sum[i]=-(sum[i]+ans[i]);
+
+  /* path from imaginary axis to -infinity */
+  if(zph > .001*rho)
+    slope=rho/zph;
+  else
+    slope=1000.;
+
+  del=PTP/del;
+  delta=cmplx(-1.0,slope)*del/sqrt(1.+slope*slope);
+  delta2=-conj(delta);
+  gshank(cp1,delta,ans,6,sum,0,bk,bk);
+  rmis=rho*(creal(ck1)-ck2);
+
+  jump = FALSE;
+  if( (rmis >= 2.*ck2) && (rho >= 1.e-10) )
+  {
+    if(zph >= 1.e-10)
+    {
+      bk=cmplx(-zph,rho)*(ck1-cp3);
+      rmis=-creal(bk)/fabs(cimag(bk));
+      if(rmis > 4.*rho/zph)
+	jump = TRUE;
+    }
+
+    if( ! jump )
+    {
+      /* integrate up between branch cuts, then to + infinity */
+      cp1=ck1-(.1+.2fj);
+      cp2=cp1+.2;
+      bk=cmplx(0.,del);
+      gshank(cp1,bk,sum,6,ans,0,bk,bk);
+      a=cp1;
+      b=cp2;
+      rom1(6,ans,1);
+      for( i = 0; i < 6; i++ )
+	ans[i] -= sum[i];
+
+      gshank(cp3,bk,sum,6,ans,0,bk,bk);
+      gshank(cp2,delta2,ans,6,sum,0,bk,bk);
+    }
+
+    jump = TRUE;
+
+  } /* if( (rmis >= 2.*ck2) || (rho >= 1.e-10) ) */
+  else
+    jump = FALSE;
+
+  if( ! jump )
+  {
+    /* integrate below branch points, then to + infinity */
+    for( i = 0; i < 6; i++ )
+      sum[i]=-ans[i];
+
+    rmis=creal(ck1)*1.01;
+    if( (ck2+1.) > rmis )
+      rmis=ck2+1.;
+
+    bk=cmplx(rmis,.99*cimag(ck1));
+    delta=bk-cp3;
+    delta *= del/cabs(delta);
+    gshank(cp3,delta,ans,6,sum,1,bk,delta2);
+
+  } /* if( ! jump ) */
+
+  ans[5] *= ck1;
+
+  /* ADDA: conjugate was removed */
+  *erv=ck1sq*ans[2];
+  *ezv=ck1sq*(ans[1]+ck2sq*ans[4]);
+  *erh=ck2sq*(ans[0]+ans[5]);
+  *eph=-ck2sq*(ans[3]+ans[5]);
+
+  return;
+}
+
+/*-----------------------------------------------------------------------*/
+
+/* gshank integrates the 6 sommerfeld integrals from start to */
+/* infinity (until convergence) in lambda.  at the break point, bk, */
+/* the step increment may be changed from dela to delb.  shank's */
+/* algorithm to accelerate convergence of a slowly converging series */
+/* is used */
+static void gshank( complex double start, complex double dela, complex double *sum,
+    int nans, complex double *seed, int ibk, complex double bk, complex double delb )
+{
+  int ibx, j, i, jm, intx, inx, brk=0;
+  static double rbk, amg, den, denm;
+  static complex double a1, a2, as1, as2, del, aa;
+  static complex double q1[6][MAXH], q2[6][MAXH], ans1[6], ans2[6];
+
+  rbk=creal(bk);
+  del=dela;
+  if(ibk == 0)
+    ibx=1;
+  else
+    ibx=0;
+
+  for( i = 0; i < nans; i++ )
+    ans2[i]=seed[i];
+
+  b=start;
+  for( intx = 1; intx <= MAXH; intx++ )
+  {
+    inx=intx-1;
+    a=b;
+    b += del;
+
+    if( (ibx == 0) && (creal(b) >= rbk) )
+    {
+      /* hit break point.  reset seed and start over. */
+      ibx=1;
+      b=bk;
+      del=delb;
+      rom1(nans,sum,2);
+      if( ibx != 2 )
+      {
+	for( i = 0; i < nans; i++ )
+	  ans2[i] += sum[i];
+	intx = 0;
+	continue;
+      }
+
+      for( i = 0; i < nans; i++ )
+	ans2[i]=ans1[i]+sum[i];
+      intx = 0;
+      continue;
+
+    } /* if( (ibx == 0) && (creal(b) >= rbk) ) */
+
+    rom1(nans,sum,2);
+    for( i = 0; i < nans; i++ )
+      ans1[i] = ans2[i]+sum[i];
+    a=b;
+    b += del;
+
+    if( (ibx == 0) && (creal(b) >= rbk) )
+    {
+      /* hit break point.  reset seed and start over. */
+      ibx=2;
+      b=bk;
+      del=delb;
+      rom1(nans,sum,2);
+      if( ibx != 2 )
+      {
+	for( i = 0; i < nans; i++ )
+	  ans2[i] += sum[i];
+	intx = 0;
+	continue;
+      }
+
+      for( i = 0; i < nans; i++ )
+	ans2[i] = ans1[i]+sum[i];
+      intx = 0;
+      continue;
+
+    } /* if( (ibx == 0) && (creal(b) >= rbk) ) */
+
+    rom1(nans,sum,2);
+    for( i = 0; i < nans; i++ )
+      ans2[i]=ans1[i]+sum[i];
+
+    den=0.;
+    for( i = 0; i < nans; i++ )
+    {
+      as1=ans1[i];
+      as2=ans2[i];
+
+      if(intx >= 2)
+      {
+	for( j = 1; j < intx; j++ )
+	{
+	  jm=j-1;
+	  aa=q2[i][jm];
+	  a1=q1[i][jm]+as1-2.*aa;
+
+	  if( (creal(a1) != 0.) || (cimag(a1) != 0.) )
+	  {
+	    a2=aa-q1[i][jm];
+	    a1=q1[i][jm]-a2*a2/a1;
+	  }
+	  else
+	    a1=q1[i][jm];
+
+	  a2=aa+as2-2.*as1;
+	  if( (creal(a2) != 0.) || (cimag(a2) != 0.) )
+	    a2=aa-(as1-aa)*(as1-aa)/a2;
+	  else
+	    a2=aa;
+
+	  q1[i][jm]=as1;
+	  q2[i][jm]=as2;
+	  as1=a1;
+	  as2=a2;
+
+	} /* for( j = 1; i < intx; i++ ) */
+
+      } /* if(intx >= 2) */
+
+      q1[i][intx-1]=as1;
+      q2[i][intx-1]=as2;
+      amg=fabs(creal(as2))+fabs(cimag(as2));
+      if(amg > den)
+	den=amg;
+
+    } /* for( i = 0; i < nans; i++ ) */
+
+    denm=1.e-3*den*CRIT;
+    jm=intx-3;
+    if(jm < 1)
+      jm=1;
+
+    for( j = jm-1; j < intx; j++ )
+    {
+      brk = FALSE;
+      for( i = 0; i < nans; i++ )
+      {
+	a1=q2[i][j];
+	den=(fabs(creal(a1))+fabs(cimag(a1)))*CRIT;
+	if(den < denm)
+	  den=denm;
+	a1=q1[i][j]-a1;
+	amg=fabs(creal(a1)+fabs(cimag(a1)));
+	if(amg > den)
+	{
+	  brk = TRUE;
+	  break;
+	}
+
+      } /* for( i = 0; i < nans; i++ ) */
+
+      if( brk ) break;
+
+    } /* for( j = jm-1; j < intx; j++ ) */
+
+    if( ! brk )
+    {
+      for( i = 0; i < nans; i++ )
+	sum[i]=.5*(q1[i][inx]+q2[i][inx]);
+      return;
+    }
+
+  } /* for( intx = 1; intx <= maxh; intx++ ) */
+
+  /* No convergence */
+  printf("z=%g, rho=%g\n",zph,rho);
+  abort_on_error(-6);
+}
+
+/*-----------------------------------------------------------------------*/
+
+/* hankel evaluates hankel function of the first kind,   */
+/* order zero, and its derivative for complex argument z */
+static void hankel( complex double z, complex double *h0, complex double *h0p )
+{
+  int i, k, ib, iz, miz;
+  static int m[101], init = FALSE;
+  static double a1[25], a2[25], a3[25], a4[25], psi, tst, zms;
+  complex double clogz, j0, j0p, p0z, p1z, q0z, q1z, y0, y0p, zi, zi2, zk;
+
+  /* initialization of constants */
+  if( ! init )
+  {
+    psi=-GAMMA;
+    for( k = 1; k <= 25; k++ )
+    {
+      i = k-1;
+      a1[i]=-.25/(k*k);
+      a2[i]=1.0/(k+1.0);
+      psi += 1.0/k;
+      a3[i]=psi+psi;
+      a4[i]=(psi+psi+1.0/(k+1.0))/(k+1.0);
+    }
+
+    for( i = 1; i <= 101; i++ )
+    {
+      tst=1.0;
+      for( k = 0; k < 24; k++ )
+      {
+	init = k;
+	tst *= -i*a1[k];
+	if(tst*a3[k] < 1.e-6)
+	  break;
+      }
+      m[i-1]=init+1;
+    }
+
+    init = TRUE;
+
+  } /* if( ! init ) */
+
+  zms=z*conj(z);
+  if(zms == 0.)
+    abort_on_error(-7);
+
+  ib=0;
+  if(zms <= 16.81)
+  {
+    if(zms > 16.)
+      ib=1;
+
+    /* series expansion */
+    iz=zms;
+    miz=m[iz];
+    j0=CPLX_10;
+    j0p=j0;
+    y0=CPLX_00;
+    y0p=y0;
+    zk=j0;
+    zi=z*z;
+
+    for( k = 0; k < miz; k++ )
+    {
+      zk *= a1[k]*zi;
+      j0 += zk;
+      j0p += a2[k]*zk;
+      y0 += a3[k]*zk;
+      y0p += a4[k]*zk;
+    }
+
+    j0p *= -.5*z;
+    clogz=clog(.5*z);
+    y0=(2.*j0*clogz-y0)/PI+C2;
+    y0p=(2./z+2.*j0p*clogz+.5*y0p*z)/PI+C1*z;
+    *h0=j0+CPLX_01*y0;
+    *h0p=j0p+CPLX_01*y0p;
+
+    if(ib == 0)
+      return;
+
+    y0=*h0;
+    y0p=*h0p;
+
+  } /* if(zms <= 16.81) */
+
+  /* asymptotic expansion */
+  zi=1./z;
+  zi2=zi*zi;
+  p0z=1.+(P20*zi2-P10)*zi2;
+  p1z=1.+(P11-P21*zi2)*zi2;
+  q0z=(Q20*zi2-Q10)*zi;
+  q1z=(Q11-Q21*zi2)*zi;
+  zk=cexp(CPLX_01*(z-POF))*csqrt(zi)*C3;
+  *h0=zk*(p0z+CPLX_01*q0z);
+  *h0p=CPLX_01*zk*(p1z+CPLX_01*q1z);
+
+  if(ib == 0)
+    return;
+
+  zms=cos((sqrt(zms)-4.)*31.41592654);
+  *h0=.5*(y0*(1.+zms)+ *h0*(1.-zms));
+  *h0p=.5*(y0p*(1.+zms)+ *h0p*(1.-zms));
+
+  return;
+}
+
+/*-----------------------------------------------------------------------*/
+
+/* compute integration parameter xlam=lambda from parameter t. */
+static void lambda( double t, complex double *xlam, complex double *dxlam )
+{
+  *dxlam=b-a;
+  *xlam=a+*dxlam*t;
+  return;
+}
+
+/*-----------------------------------------------------------------------*/
+
+/* rom1 integrates the 6 sommerfeld integrals from a to b in lambda. */
+/* the method of variable interval width romberg integration is used. */
+static void rom1( int n, complex double *sum, int nx )
+{
+  int jump, lstep, nogo, i, ns, nt;
+  static double z, ze, s, ep, zend, dz=0., dzot=0., tr, ti;
+  static complex double t00, t11, t02;
+  static complex double g1[6], g2[6], g3[6], g4[6], g5[6], t01[6], t10[6], t20[6];
+
+  lstep=0;
+  z=0.;
+  ze=1.;
+  s=1.;
+  ep=s/(1.e4*NM);
+  zend=ze-ep;
+  for( i = 0; i < n; i++ )
+    sum[i]=CPLX_00;
+  ns=nx;
+  nt=0;
+  saoa(z,g1);
+
+  jump = FALSE;
+  while( TRUE )
+  {
+    if( ! jump )
+    {
+      dz=s/ns;
+      if( (z+dz) > ze )
+      {
+	dz=ze-z;
+	if( dz <= ep )
+	  return;
+      }
+
+      dzot=dz*.5;
+      saoa(z+dzot,g3);
+      saoa(z+dz,g5);
+
+    } /* if( ! jump ) */
+
+    nogo=FALSE;
+    for( i = 0; i < n; i++ )
+    {
+      t00=(g1[i]+g5[i])*dzot;
+      t01[i]=(t00+dz*g3[i])*.5;
+      t10[i]=(4.*t01[i]-t00)/3.;
+
+      /* test convergence of 3 point romberg result */
+      test( creal(t01[i]), creal(t10[i]), &tr, cimag(t01[i]), cimag(t10[i]), &ti, 0. );
+      if( (tr > CRIT) || (ti > CRIT) )
+	nogo = TRUE;
+    }
+
+    if( ! nogo )
+    {
+      for( i = 0; i < n; i++ )
+	sum[i] += t10[i];
+
+      nt += 2;
+      z += dz;
+      if(z > zend)
+	return;
+
+      for( i = 0; i < n; i++ )
+	g1[i]=g5[i];
+
+      if( (nt >= NTS) && (ns > nx) )
+      {
+	ns=ns/2;
+	nt=1;
+      }
+
+      jump = FALSE;
+      continue;
+
+    } /* if( ! nogo ) */
+
+    saoa(z+dz*.25,g2);
+    saoa(z+dz*.75,g4);
+    nogo=FALSE;
+    for( i = 0; i < n; i++ )
+    {
+      t02=(t01[i]+dzot*(g2[i]+g4[i]))*.5;
+      t11=(4.*t02-t01[i])/3.;
+      t20[i]=(16.*t11-t10[i])/15.;
+
+      /* test convergence of 5 point romberg result */
+      test( creal(t11), creal(t20[i]), &tr, cimag(t11), cimag(t20[i]), &ti, 0. );
+      if( (tr > CRIT) || (ti > CRIT) )
+	nogo = TRUE;
+    }
+
+    if( ! nogo )
+    {
+      for( i = 0; i < n; i++ )
+	sum[i] += t20[i];
+
+      nt++;
+      z += dz;
+      if(z > zend)
+	return;
+
+      for( i = 0; i < n; i++ )
+	g1[i]=g5[i];
+
+      if( (nt >= NTS) && (ns > nx) )
+      {
+	ns=ns/2;
+	nt=1;
+      }
+
+      jump = FALSE;
+      continue;
+
+    } /* if( ! nogo ) */
+
+    nt=0;
+    if(ns < NM)
+    {
+      ns *= 2;
+      dz=s/ns;
+      dzot=dz*.5;
+
+      for( i = 0; i < n; i++ )
+      {
+	g5[i]=g3[i];
+	g3[i]=g2[i];
+      }
+
+      jump = TRUE;
+      continue;
+
+    } /* if(ns < nm) */
+
+    if( ! lstep )
+    {
+      lstep = TRUE;
+      lambda( z, &t00, &t11 );
+    }
+
+    for( i = 0; i < n; i++ )
+      sum[i] += t20[i];
+
+    nt++;
+    z += dz;
+    if(z > zend)
+      return;
+
+    for( i = 0; i < n; i++ )
+      g1[i]=g5[i];
+
+    if( (nt >= NTS) && (ns > nx) )
+    {
+      ns /= 2;
+      nt=1;
+    }
+
+    jump = FALSE;
+
+  } /* while( TRUE ) */
+
+}
+
+/*-----------------------------------------------------------------------*/
+
+/* saoa computes the integrand for each of the 6 sommerfeld */
+/* integrals for source and observer above ground */
+static void saoa( double t, complex double *ans)
+{
+  double xlr, sign;
+  static complex double xl, dxl, cgam1, cgam2, b0, b0p, com, dgam, den1, den2;
+
+  lambda(t, &xl, &dxl);
+  if( jh == 0 )
+  {
+    /* bessel function form */
+    bessel(xl*rho, &b0, &b0p);
+    b0  *=2.;
+    b0p *=2.;
+    cgam1=csqrt(xl*xl-ck1sq);
+    cgam2=csqrt(xl*xl-ck2sq);
+    if(creal(cgam1) == 0.)
+      cgam1=cmplx(0.,-fabs(cimag(cgam1)));
+    if(creal(cgam2) == 0.)
+      cgam2=cmplx(0.,-fabs(cimag(cgam2)));
+  }
+  else
+  {
+    /* hankel function form */
+    hankel(xl*rho, &b0, &b0p);
+    com=xl-ck1;
+    cgam1=csqrt(xl+ck1)*csqrt(com);
+    if(creal(com) < 0. && cimag(com) >= 0.)
+      cgam1=-cgam1;
+    com=xl-ck2;
+    cgam2=csqrt(xl+ck2)*csqrt(com);
+    if(creal(com) < 0. && cimag(com) >= 0.)
+      cgam2=-cgam2;
+  }
+
+  xlr=xl*conj(xl);
+  if(xlr >= tsmag)
+  {
+    if(cimag(xl) >= 0.)
+    {
+      xlr=creal(xl);
+      if(xlr >= ck2)
+      {
+	if(xlr <= ck1r)
+	  dgam=cgam2-cgam1;
+	else
+	{
+	  sign=1.;
+	  dgam=1./(xl*xl);
+	  dgam=sign*((ct3*dgam+ct2)*dgam+ct1)/xl;
+	}
+      }
+      else
+      {
+	sign=-1.;
+	dgam=1./(xl*xl);
+	dgam=sign*((ct3*dgam+ct2)*dgam+ct1)/xl;
+      } /* if(xlr >= ck2) */
+
+    } /* if(cimag(xl) >= 0.) */
+    else
+    {
+      sign=1.;
+      dgam=1./(xl*xl);
+      dgam=sign*((ct3*dgam+ct2)*dgam+ct1)/xl;
+    }
+
+  } /* if(xlr < tsmag) */
+  else
+    dgam=cgam2-cgam1;
+
+  den2=cksm*dgam/(cgam2*(ck1sq*cgam2+ck2sq*cgam1));
+  den1=1./(cgam1+cgam2)-cksm/cgam2;
+  com=dxl*xl*cexp(-cgam2*zph);
+  ans[5]=com*b0*den1/ck1;
+  com *= den2;
+
+  if(rho != 0.)
+  {
+    b0p=b0p/rho;
+    ans[0]=-com*xl*(b0p+b0*xl);
+    ans[3]=com*xl*b0p;
+  }
+  else
+  {
+    ans[0]=-com*xl*xl*.5;
+    ans[3]=ans[0];
+  }
+
+  ans[1]=com*cgam2*cgam2*b0;
+  ans[2]=-ans[3]*cgam2*rho;
+  ans[4]=com*b0;
+
+  return;
+}
+
+/*-----------------------------------------------------------------------*/
+
+/* test for convergence in numerical integration */
+// ADDA: copied from nec2c.c
+static void test( double f1r, double f2r, double *tr,
+    double f1i, double f2i, double *ti, double dmin )
+{
+  double den;
+
+  den= fabs( f2r);
+  *tr= fabs( f2i);
+
+  if( den < *tr)
+    den= *tr;
+  if( den < dmin)
+    den= dmin;
+
+  if( den < 1.0e-37)
+  {
+    *tr=0.;
+    *ti=0.;
+    return;
+  }
+
+  *tr= fabs(( f1r- f2r)/ den);
+  *ti= fabs(( f1i- f2i)/ den);
+
+  return;
+}
+
+/*------------------------------------------------------------------------*/
+
+/*  abort_on_error()
+ *
+ *  prints an error message and exits
+ */
+
+static void abort_on_error( int why )
+{
+  switch( why )
+  {
+    case -1 : /* abort if input file name too long */
+      fprintf( stderr, "%s\n",
+	  "nec2c: input file name too long - aborting" );
+      break;
+
+    case -2 : /* abort if output file name too long */
+      fprintf( stderr, "%s\n",
+	  "nec2c: output file name too long - aborting" );
+      break;
+
+    case -3 : /* abort on input file read error */
+      fprintf( stderr, "%s\n",
+	  "nec2c: error reading input file - aborting" );
+      break;
+
+    case -4 : /* Abort on malloc failure */
+      fprintf( stderr, "%s\n",
+	  "nec2c: A memory allocation request has failed - aborting" );
+      break;
+
+    case -5 : /* Abort if a GF card is read */
+      fprintf( stderr, "%s\n",
+	  "nec2c: NGF solution option not supported - aborting" );
+      break;
+
+    case -6: /* No convergence in gshank() */
+            fprintf( stderr, "%s\n",
+	  "nec2c: No convergence in gshank() - aborting" );
+      break;
+
+    case -7: /* Error in hankel() */
+            fprintf( stderr, "%s\n",
+	  "nec2c: hankel not valid for z=0. - aborting" );
+      break;
+  }  /* switch( why ) */
+
+  /* clean up and quit */
+  exit( why );
+
+} /* end of abort_on_error() */
diff --git a/src/sparse_ops.h b/src/sparse_ops.h
index f42aef53..0f2eed6f 100644
--- a/src/sparse_ops.h
+++ b/src/sparse_ops.h
@@ -49,9 +49,6 @@ static inline void AijProd(doublecomplex * restrict argvec,doublecomplex * restr
 	doublecomplex iterm[6];
 	const size_t i3 = 3*i, j3 = 3*j;
 
-	(*CalcInterTerm)(position[i3]-position_full[j3], position[i3+1]-position_full[j3+1],
-		position[i3+2]-position_full[j3+2], iterm);
-
 	__m128d res, tmp;
 
 	IGNORE_WARNING(-Wstrict-aliasing); // cast from doublecomplex* to double* is perfectly valid in C99
@@ -61,6 +58,8 @@ static inline void AijProd(doublecomplex * restrict argvec,doublecomplex * restr
 	STOP_IGNORE;
 
 	if (j!=local_nvoid_d0+i) { // main interaction is not computed for coinciding dipoles
+		(*CalcInterTerm)(position[i3]-position_full[j3], position[i3+1]-position_full[j3+1],
+			position[i3+2]-position_full[j3+2], iterm);
 		res = cmul(argX, *(__m128d *)&(iterm[0]));
 		tmp = cmul(argY, *(__m128d *)&(iterm[1]));
 		res = cadd(tmp,res);
diff --git a/src/vars.c b/src/vars.c
index 6b5c206f..7c85c9db 100644
--- a/src/vars.c
+++ b/src/vars.c
@@ -125,6 +125,7 @@ TIME_TYPE Timing_EField,      // time for calculating scattered fields
 bool surface;           // whether nearby surface is present
 enum refl ReflRelation; // method to calculate reflected Green's tensor
 doublecomplex msub;     // complex refractive index of the substrate
+bool msubInf;           // whether msub is infinite (perfectly reflecting surface)
 double hsub;            // height of particle center above surface
 /* Propagation (phase) directions of secondary incident beams above (A) and below (B) the surface (unit vectors)
  * When msub is complex, one of this doesn't tell the complete story, since the corresponding wave is inhomogeneous,
diff --git a/src/vars.h b/src/vars.h
index 3bde1cc5..9ba17be0 100644
--- a/src/vars.h
+++ b/src/vars.h
@@ -85,11 +85,10 @@ extern time_t wt_start,last_chp_wt;
 extern TIME_TYPE Timing_EField,Timing_FileIO,Timing_Integration,tstart_main;
 
 // related to a nearby surface
-extern bool surface;
+extern bool surface,msubInf;
 extern enum refl ReflRelation;
 extern doublecomplex msub;
-extern double hsub;
-extern double prIncRefl[3],prIncTran[3];
+extern double hsub,prIncRefl[3],prIncTran[3];
 
 #ifndef SPARSE //These variables are exclusive to the FFT mode
 
diff --git a/tests/2exec/comp2exec b/tests/2exec/comp2exec
index 834eb4e0..5cc09b88 100755
--- a/tests/2exec/comp2exec
+++ b/tests/2exec/comp2exec
@@ -81,7 +81,7 @@ fi
 
 if [ -n "$SURF_STANDARD" ]; then
   DEFSUITE=suite_surf
-  EXECTEST="$EXECTEST -surf 1 0 10 -yz"
+  EXECTEST="$EXECTEST -surf 10 1 0 -yz"
 fi
 
 SUITEFILE=${2:-$DEFSUITE}