Skip to content

Commit

Permalink
fix: incorrect dim set when creating BM25 doc sparse array (#37717)
Browse files Browse the repository at this point in the history
issue: #35853

Signed-off-by: Buqian Zheng <[email protected]>
  • Loading branch information
zhengbuqian authored Nov 16, 2024
1 parent 3cdb485 commit 00edec2
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 8 deletions.
4 changes: 2 additions & 2 deletions internal/storage/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -458,9 +458,9 @@ func (m *BM25Stats) Deserialize(bs []byte) error {
}

func (m *BM25Stats) BuildIDF(tf []byte) (idf []byte) {
dim := typeutil.SparseFloatRowElementCount(tf)
numElements := typeutil.SparseFloatRowElementCount(tf)
idf = make([]byte, len(tf))
for idx := 0; idx < dim; idx++ {
for idx := 0; idx < numElements; idx++ {
key := typeutil.SparseFloatRowIndexAt(tf, idx)
value := typeutil.SparseFloatRowValueAt(tf, idx)
nq := m.rowsWithToken[key]
Expand Down
14 changes: 8 additions & 6 deletions internal/util/function/bm25_function.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func (v *BM25FunctionRunner) run(data []string, dst []map[uint32]float32) error

func (v *BM25FunctionRunner) BatchRun(inputs ...any) ([]any, error) {
if len(inputs) > 1 {
return nil, fmt.Errorf("BM25 function receieve more than one input")
return nil, fmt.Errorf("BM25 function received more than one input column")
}

text, ok := inputs[0].([]string)
Expand Down Expand Up @@ -158,16 +158,18 @@ func (v *BM25FunctionRunner) GetOutputFields() []*schemapb.FieldSchema {
}

func buildSparseFloatArray(mapdata []map[uint32]float32) *schemapb.SparseFloatArray {
dim := 0
dim := int64(0)
bytes := lo.Map(mapdata, func(sparseMap map[uint32]float32, _ int) []byte {
if len(sparseMap) > dim {
dim = len(sparseMap)
row := typeutil.CreateAndSortSparseFloatRow(sparseMap)
rowDim := typeutil.SparseFloatRowDim(row)
if rowDim > dim {
dim = rowDim
}
return typeutil.CreateAndSortSparseFloatRow(sparseMap)
return row
})

return &schemapb.SparseFloatArray{
Contents: bytes,
Dim: int64(dim),
Dim: dim,
}
}

0 comments on commit 00edec2

Please sign in to comment.