-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathxy.cu
169 lines (126 loc) · 5.32 KB
/
xy.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
/*!
\file xy.cu
\brief Functions to generate Hamiltonians for the XY model
*/
#include "hamiltonian.h"
__device__ float HOffBondXXY(const int si, const int bra, const float JJ)
{
float valH;
//int S0, S1;
//int T0, T1;
valH = JJ*0.5; //contribution from the J part of the Hamiltonian
return valH;
}
__device__ float HOffBondYXY(const int si, const int bra, const float JJ)
{
float valH;
//int S0, S1;
//int T0, T1;
valH = JJ*0.5; //contribution from the J part of the Hamiltonian
return valH;
}
__device__ float HDiagPartXY(const int bra, int latticeSize, int3* d_Bond, const float JJ)
{
return 0.f;
}//HdiagPart
__global__ void FillDiagonalsXY(int* d_basis, f_hamiltonian H, int* d_Bond, parameters data)
{
int row = blockIdx.x*blockDim.x + threadIdx.x;
H.vals[row] = 0.f;
H.rows[row] = 2*H.sectorDim;
H.cols[row] = 2*H.sectorDim;
H.set[row] = 0;
}
/* Function FillSparse: this function takes the empty Hamiltonian arrays and fills them up. Each thread in x handles one ket |i>, and each thread in y handles one site T0
Inputs: d_basisPosition - position information about the basis
d_basis - other basis infos
d_dim - the number of kets
H_sort - an array that will store the Hamiltonian
d_Bond - the bond information
d_latticeSize - the number of lattice sites
JJ - the coupling parameter
*/
__global__ void FillSparseXY(int* d_basisPosition, int* d_basis, f_hamiltonian H, int* d_Bond, parameters data, int offset)
{
int dim = H.sectorDim;
int latticeSize = data.nsite;
int ii = (blockDim.x/(2*latticeSize))*(blockIdx.x + offset) + threadIdx.x/(2*latticeSize);
int T0 = threadIdx.x%(2*latticeSize);
#if __CUDA_ARCH__ < 200
const int arraySize = 512;
#elif __CUDA_ARCH__ >= 200
const int arraySize = 1024;
#else
#error Could not detect GPU architecture
#endif
__shared__ int3 tempBond[32];
int count;
__shared__ int tempPos[arraySize];
__shared__ float tempVal[arraySize];
//__shared__ uint tempi[arraySize];
unsigned int tempi;
__shared__ unsigned int tempod[arraySize];
int stride = 4*latticeSize;
//int tempcount;
int site = T0%(latticeSize);
count = 0;
int rowTemp;
int braSector;
int start = (bool)(dim%arraySize) ? (dim/arraySize + 1)*arraySize : dim/arraySize;
int s;
//int si, sj;//sk,sl; //spin operators
//unsigned int tempi;// tempod; //tempj;
//cuDoubleComplex tempD;
__syncthreads();
bool compare;
if( ii < dim )
{
if (T0 < 2*latticeSize)
{
tempi = d_basis[ii];
//Putting bond info in shared memory
(tempBond[site]).x = d_Bond[site];
(tempBond[site]).y = d_Bond[latticeSize + site];
(tempBond[site]).z = d_Bond[2*latticeSize + site];
__syncthreads();
//Horizontal bond ---------------
s = (tempBond[site]).x;
tempod[threadIdx.x] = tempi;
braSector = (tempi & (1 << s)) >> s;
tempod[threadIdx.x] ^= (1<<s);
s = (tempBond[site]).y;
braSector ^= (tempi & (1 << s)) >> s;
tempod[threadIdx.x] ^= (1<<s);
compare = (d_basisPosition[tempod[threadIdx.x]] != -1) && braSector;
compare &= (d_basisPosition[tempod[threadIdx.x]] > ii);
tempPos[threadIdx.x] = (compare) ? d_basisPosition[tempod[threadIdx.x]] : dim;
tempVal[threadIdx.x] = HOffBondXXY(site, tempi, data.J1);
count += (int)compare;
rowTemp = (T0/latticeSize) ? ii : tempPos[threadIdx.x];
rowTemp = (compare) ? rowTemp : 2*dim;
H.vals[ ii*stride + 4*site + (T0/latticeSize)+ start ] = tempVal[threadIdx.x]; //(T0/latticeSize) ? tempVal[threadIdx.x] : cuConj(tempVal[threadIdx.x]);
H.cols[ ii*stride + 4*site + (T0/latticeSize) + start ] = (T0/latticeSize) ? tempPos[threadIdx.x] : ii;
H.rows[ ii*stride + 4*site + (T0/latticeSize) + start ] = rowTemp;
H.set[ ii*stride + 4*site + (T0/latticeSize) + start ] = (int)compare;
//Vertical bond -----------------
s = (tempBond[site]).x;
tempod[threadIdx.x] = tempi;
braSector = (tempi & (1 << s)) >> s;
tempod[threadIdx.x] ^= (1<<s);
s = (tempBond[site]).z;
braSector ^= (tempi & (1 << s)) >> s;
tempod[threadIdx.x] ^= (1<<s);
compare = (d_basisPosition[tempod[threadIdx.x]] != -1) && braSector;
compare &= (d_basisPosition[tempod[threadIdx.x]] > ii);
tempPos[threadIdx.x] = (compare) ? d_basisPosition[tempod[threadIdx.x]] : dim;
tempVal[threadIdx.x] = HOffBondYXY(site,tempi, data.J1);
count += (int)compare;
rowTemp = (T0/latticeSize) ? ii : tempPos[threadIdx.x];
rowTemp = (compare) ? rowTemp : 2*dim;
H.vals[ ii*stride + 4*site + 2 + (T0/latticeSize) + start ] = tempVal[threadIdx.x]; // (T0/latticeSize) ? tempVal[threadIdx.x] : cuConj(tempVal[threadIdx.x]);
H.cols[ ii*stride + 4*site + 2 + (T0/latticeSize) + start ] = (T0/latticeSize) ? tempPos[threadIdx.x] : ii;
H.rows[ ii*stride + 4*site + 2 + (T0/latticeSize) + start ] = rowTemp;
H.set[ ii*stride + 4*site + 2 + (T0/latticeSize) + start ] = (int)compare;
}
}//end of ii
}//end of FillSparse