-
Notifications
You must be signed in to change notification settings - Fork 16
/
index-sig-to-cid.go
310 lines (271 loc) · 8.12 KB
/
index-sig-to-cid.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
package main
import (
"context"
"fmt"
"os"
"path/filepath"
"time"
"github.com/dustin/go-humanize"
bin "github.com/gagliardetto/binary"
"github.com/gagliardetto/solana-go"
"github.com/ipfs/go-cid"
carv2 "github.com/ipld/go-car/v2"
"github.com/rpcpool/yellowstone-faithful/bucketteer"
"github.com/rpcpool/yellowstone-faithful/indexes"
"github.com/rpcpool/yellowstone-faithful/indexmeta"
"github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode"
"k8s.io/klog/v2"
)
// CreateIndex_sig2cid creates an index file that maps transaction signatures to CIDs.
func CreateIndex_sig2cid(
ctx context.Context,
epoch uint64,
network indexes.Network,
tmpDir string,
carPath string,
indexDir string,
) (string, error) {
// Check if the CAR file exists:
exists, err := fileExists(carPath)
if err != nil {
return "", fmt.Errorf("failed to check if CAR file exists: %w", err)
}
if !exists {
return "", fmt.Errorf("CAR file %q does not exist", carPath)
}
cr, err := carv2.OpenReader(carPath)
if err != nil {
return "", fmt.Errorf("failed to open CAR file: %w", err)
}
// check it has 1 root
roots, err := cr.Roots()
if err != nil {
return "", fmt.Errorf("failed to get roots: %w", err)
}
// There should be only one root CID in the CAR file.
if len(roots) != 1 {
return "", fmt.Errorf("CAR file has %d roots, expected 1", len(roots))
}
rootCid := roots[0]
// TODO: use another way to precisely count the number of solana Blocks in the CAR file.
klog.Infof("Counting items in car file...")
numItems, err := carCountItems(carPath)
if err != nil {
return "", fmt.Errorf("failed to count items in car file: %w", err)
}
klog.Infof("Found %s items in car file", humanize.Comma(int64(numItems)))
tmpDir = filepath.Join(tmpDir, "index-sig-to-cid-"+time.Now().Format("20060102-150405.000000000"))
if err = os.MkdirAll(tmpDir, 0o755); err != nil {
return "", fmt.Errorf("failed to create tmp dir: %w", err)
}
klog.Infof("Creating builder with %d items", numItems)
sig2c, err := indexes.NewWriter_SigToCid(
epoch,
rootCid,
network,
tmpDir,
numItems, // TODO: what if the number of real items is less than this?
)
if err != nil {
return "", fmt.Errorf("failed to open index store: %w", err)
}
defer sig2c.Close()
numItemsIndexed := uint64(0)
klog.Infof("Indexing...")
dr, err := cr.DataReader()
if err != nil {
return "", fmt.Errorf("failed to get data reader: %w", err)
}
// Iterate over all Transactions in the CAR file and put them into the index,
// using the transaction signature as the key and the CID as the value.
err = FindTransactions(
ctx,
dr,
func(c cid.Cid, txNode *ipldbindcode.Transaction) error {
sig, err := readFirstSignature(txNode.Data.Bytes())
if err != nil {
return fmt.Errorf("failed to read signature: %w", err)
}
err = sig2c.Put(sig, c)
if err != nil {
return fmt.Errorf("failed to put cid to offset: %w", err)
}
numItemsIndexed++
if numItemsIndexed%100_000 == 0 {
printToStderr(".")
}
return nil
})
if err != nil {
return "", fmt.Errorf("failed to index; error while iterating over blocks: %w", err)
}
klog.Infof("Sealing index...")
if err = sig2c.Seal(ctx, indexDir); err != nil {
return "", fmt.Errorf("failed to seal index: %w", err)
}
indexFilePath := sig2c.GetFilepath()
klog.Infof("Index created at %s; %d items indexed", indexFilePath, numItemsIndexed)
return indexFilePath, nil
}
// VerifyIndex_sig2cid verifies that the index file is correct for the given car file.
// It does this by reading the car file and comparing the offsets in the index
// file to the offsets in the car file.
func VerifyIndex_sig2cid(ctx context.Context, carPath string, indexFilePath string) error {
// Check if the CAR file exists:
exists, err := fileExists(carPath)
if err != nil {
return fmt.Errorf("failed to check if CAR file exists: %w", err)
}
if !exists {
return fmt.Errorf("CAR file %s does not exist", carPath)
}
// Check if the index file exists:
exists, err = fileExists(indexFilePath)
if err != nil {
return fmt.Errorf("failed to check if index file exists: %w", err)
}
if !exists {
return fmt.Errorf("index file %s does not exist", indexFilePath)
}
cr, err := carv2.OpenReader(carPath)
if err != nil {
return fmt.Errorf("failed to open CAR file: %w", err)
}
// check it has 1 root
roots, err := cr.Roots()
if err != nil {
return fmt.Errorf("failed to get roots: %w", err)
}
// There should be only one root CID in the CAR file.
if len(roots) != 1 {
return fmt.Errorf("CAR file has %d roots, expected 1", len(roots))
}
c2o, err := indexes.Open_SigToCid(indexFilePath)
if err != nil {
return fmt.Errorf("failed to open index: %w", err)
}
dr, err := cr.DataReader()
if err != nil {
return fmt.Errorf("failed to get data reader: %w", err)
}
numItems := uint64(0)
err = FindTransactions(
ctx,
dr,
func(c cid.Cid, txNode *ipldbindcode.Transaction) error {
sig, err := readFirstSignature(txNode.Data.Bytes())
if err != nil {
return fmt.Errorf("failed to read signature: %w", err)
}
got, err := c2o.Get(sig)
if err != nil {
return fmt.Errorf("failed to find cid from signature: %w", err)
}
if !got.Equals(c) {
return fmt.Errorf("sig %s: expected cid %s, got %s", sig, c, got)
}
numItems++
if numItems%100_000 == 0 {
printToStderr(".")
}
return nil
})
if err != nil {
return fmt.Errorf("failed to verify index; error while iterating over blocks: %w", err)
}
return nil
}
func VerifyIndex_sigExists(ctx context.Context, carPath string, indexFilePath string) error {
// Check if the CAR file exists:
exists, err := fileExists(carPath)
if err != nil {
return fmt.Errorf("failed to check if CAR file exists: %w", err)
}
if !exists {
return fmt.Errorf("CAR file %s does not exist", carPath)
}
// Check if the index file exists:
exists, err = fileExists(indexFilePath)
if err != nil {
return fmt.Errorf("failed to check if index file exists: %w", err)
}
if !exists {
return fmt.Errorf("index file %s does not exist", indexFilePath)
}
cr, err := carv2.OpenReader(carPath)
if err != nil {
return fmt.Errorf("failed to open CAR file: %w", err)
}
// check it has 1 root
roots, err := cr.Roots()
if err != nil {
return fmt.Errorf("failed to get roots: %w", err)
}
// There should be only one root CID in the CAR file.
if len(roots) != 1 {
return fmt.Errorf("CAR file has %d roots, expected 1", len(roots))
}
sigExists, err := bucketteer.Open(indexFilePath)
if err != nil {
return fmt.Errorf("failed to open index: %w", err)
}
// check root_cid matches
rootCID := roots[0]
storedRootCid, ok := sigExists.Meta().GetCid(indexmeta.MetadataKey_RootCid)
if !ok {
return fmt.Errorf("index file does not have a root cid meta")
}
if !rootCID.Equals(storedRootCid) {
return fmt.Errorf("root CID mismatch: expected %s, got %s", rootCID, storedRootCid)
}
dr, err := cr.DataReader()
if err != nil {
return fmt.Errorf("failed to get data reader: %w", err)
}
numItems := uint64(0)
err = FindTransactions(
ctx,
dr,
func(c cid.Cid, txNode *ipldbindcode.Transaction) error {
sig, err := readFirstSignature(txNode.Data.Bytes())
if err != nil {
return fmt.Errorf("failed to read signature: %w", err)
}
got, err := sigExists.Has(sig)
if err != nil {
return fmt.Errorf("failed to check if sig exists: %w", err)
}
if !got {
return fmt.Errorf("sig %s: expected to exist, but it does not", sig)
}
numItems++
if numItems%100_000 == 0 {
printToStderr(".")
}
return nil
})
if err != nil {
return fmt.Errorf("failed to verify index; error while iterating over blocks: %w", err)
}
return nil
}
func readFirstSignature(buf []byte) (solana.Signature, error) {
decoder := bin.NewCompactU16Decoder(buf)
numSigs, err := decoder.ReadCompactU16()
if err != nil {
return solana.Signature{}, err
}
if numSigs == 0 {
return solana.Signature{}, fmt.Errorf("no signatures")
}
// Read the first signature:
var sig solana.Signature
numRead, err := decoder.Read(sig[:])
if err != nil {
return solana.Signature{}, err
}
if numRead != 64 {
return solana.Signature{}, fmt.Errorf("unexpected signature length %d", numRead)
}
return sig, nil
}