Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

metadata #132

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 80 additions & 16 deletions cmd-car-split.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"path/filepath"
"strconv"

"github.com/anjor/carlet"
commcid "github.com/filecoin-project/go-fil-commcid"
commp "github.com/filecoin-project/go-fil-commp-hashhash"
"github.com/filecoin-project/go-leb128"
Expand All @@ -28,11 +29,20 @@ import (
"github.com/rpcpool/yellowstone-faithful/carreader"
"github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode"
"github.com/rpcpool/yellowstone-faithful/iplddecoders"
splitcarfetcher "github.com/rpcpool/yellowstone-faithful/split-car-fetcher"
"github.com/urfave/cli/v2"
"gopkg.in/yaml.v2"
"k8s.io/klog/v2"
)

var CBOR_SHA256_DUMMY_CID = cid.MustParse("bafyreics5uul5lbtxslcigtoa5fkba7qgwu7cyb7ih7z6fzsh4lgfgraau")
var (
CBOR_SHA256_DUMMY_CID = cid.MustParse("bafyreics5uul5lbtxslcigtoa5fkba7qgwu7cyb7ih7z6fzsh4lgfgraau")
hdr = &car.CarHeader{
Roots: []cid.Cid{CBOR_SHA256_DUMMY_CID}, // placeholder
Version: 1,
}
hdrSize, _ = car.HeaderSize(hdr)
)

const maxLinks = 432000 / 18 // 18 subsets

Expand All @@ -48,7 +58,7 @@ type carFile struct {
commP cid.Cid
payloadCid cid.Cid
paddedSize uint64
fileSize int64
fileSize uint64
}
anjor marked this conversation as resolved.
Show resolved Hide resolved

func newCmd_SplitCar() *cli.Command {
Expand Down Expand Up @@ -117,7 +127,7 @@ func newCmd_SplitCar() *cli.Command {
}

epoch := c.Int("epoch")
maxFileSize := c.Int64("size")
maxFileSize := uint64(c.Int64("size"))
outputDir := c.String("output-dir")
meta := c.String("metadata")

Expand All @@ -128,14 +138,15 @@ func newCmd_SplitCar() *cli.Command {
cp := new(commp.Calc)

var (
currentFileSize int64
currentFileSize uint64
currentFileNum int
currentFile *os.File
bufferedWriter *bufio.Writer
currentSubsetInfo subsetInfo
subsetLinks []datamodel.Link
writer io.Writer
carFiles []carFile
metadata *splitcarfetcher.Metadata
)

createNewFile := func() error {
Expand All @@ -157,9 +168,25 @@ func newCmd_SplitCar() *cli.Command {
return fmt.Errorf("failed to calculate commitment to cid: %w", err)
}

cf := carFile{name: fmt.Sprintf("epoch-%d-%d.car", epoch, currentFileNum), commP: commCid, payloadCid: sl.(cidlink.Link).Cid, paddedSize: ps, fileSize: currentFileSize}
cf := carFile{
name: fmt.Sprintf("epoch-%d-%d.car", epoch, currentFileNum),
commP: commCid,
payloadCid: sl.(cidlink.Link).Cid,
paddedSize: ps,
fileSize: currentFileSize,
}
carFiles = append(carFiles, cf)

metadata.CarPieces.CarPieces = append(
metadata.CarPieces.CarPieces,
carlet.CarFile{
Name: currentSubsetInfo.fileName,
ContentSize: currentFileSize - hdrSize,
HeaderSize: hdrSize,
CommP: commCid,
PaddedSize: ps,
})

err = closeFile(bufferedWriter, currentFile)
if err != nil {
return fmt.Errorf("failed to close file: %w", err)
Expand All @@ -183,17 +210,12 @@ func newCmd_SplitCar() *cli.Command {
bufferedWriter = bufio.NewWriter(currentFile)
writer = io.MultiWriter(bufferedWriter, cp)

// Write the header
hdr := car.CarHeader{
Roots: []cid.Cid{CBOR_SHA256_DUMMY_CID}, // placeholder
Version: 1,
}
if err := car.WriteHeader(&hdr, writer); err != nil {
if err := car.WriteHeader(hdr, writer); err != nil {
return fmt.Errorf("failed to write header: %w", err)
}

// Set the currentFileSize to the size of the header
currentFileSize = int64(len(nulRootCarHeader))
currentFileSize = uint64(len(nulRootCarHeader))
currentSubsetInfo = subsetInfo{fileName: filename, firstSlot: -1, lastSlot: -1}
return nil
}
Expand All @@ -203,7 +225,7 @@ func newCmd_SplitCar() *cli.Command {
if err != nil {
return fmt.Errorf("failed to write object to car file: %s, error: %w", currentFile.Name(), err)
}
currentFileSize += int64(len(data))
currentFileSize += uint64(len(data))
return nil
}

Expand Down Expand Up @@ -238,7 +260,7 @@ func newCmd_SplitCar() *cli.Command {
dagSize += owm.RawSectionSize()
}

if currentFile == nil || currentFileSize+int64(dagSize) > maxFileSize || len(currentSubsetInfo.blockLinks) > maxLinks {
if currentFile == nil || currentFileSize+uint64(dagSize) > maxFileSize || len(currentSubsetInfo.blockLinks) > maxLinks {
err := createNewFile()
if err != nil {
return fmt.Errorf("failed to create a new file: %w", err)
Expand Down Expand Up @@ -311,8 +333,23 @@ func newCmd_SplitCar() *cli.Command {
return fmt.Errorf("failed to calculate commitment to cid: %w", err)
}

cf := carFile{name: fmt.Sprintf("epoch-%d-%d.car", epoch, currentFileNum), commP: commCid, payloadCid: sl.(cidlink.Link).Cid, paddedSize: ps, fileSize: currentFileSize}
cf := carFile{
name: fmt.Sprintf("epoch-%d-%d.car", epoch, currentFileNum),
commP: commCid,
payloadCid: sl.(cidlink.Link).Cid,
paddedSize: ps,
fileSize: currentFileSize,
}

carFiles = append(carFiles, cf)
metadata.CarPieces.CarPieces = append(
metadata.CarPieces.CarPieces,
carlet.CarFile{
Name: currentSubsetInfo.fileName,
ContentSize: currentFileSize - hdrSize,
HeaderSize: hdrSize,
CommP: commCid,
})

err = closeFile(bufferedWriter, currentFile)
if err != nil {
Expand Down Expand Up @@ -342,8 +379,16 @@ func newCmd_SplitCar() *cli.Command {
c.commP.String(),
c.payloadCid.String(),
strconv.FormatUint(c.paddedSize, 10),
strconv.FormatInt(c.fileSize, 10),
strconv.FormatUint(c.fileSize, 10),
})
if err != nil {
return fmt.Errorf("failed to write metatadata csv: %w", err)
}
}

err = writeMetadata(metadata, epoch)
if err != nil {
return fmt.Errorf("failed to write metatadata yaml: %w", err)
}

return nil
Expand Down Expand Up @@ -419,3 +464,22 @@ func writeNode(node datamodel.Node, w io.Writer) (cid.Cid, error) {
}
return cd, nil
}

func writeMetadata(metadata *splitcarfetcher.Metadata, epoch int) error {
metadataFileName := fmt.Sprintf("epoch-%d-metadata.yaml", epoch)

// Open file in append mode
metadataFile, err := os.OpenFile(metadataFileName, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return fmt.Errorf("failed to open metadata file: %w", err)
}
defer metadataFile.Close()

encoder := yaml.NewEncoder(metadataFile)
err = encoder.Encode(metadata)
if err != nil {
return fmt.Errorf("failed to encode metadata: %w", err)
}

return nil
}
Loading