Skip to content

Commit

Permalink
Changing dependency for bench_fw to *_cpu instead of *_gpu (facebookr…
Browse files Browse the repository at this point in the history
…esearch#3889)

Summary:
Pull Request resolved: facebookresearch#3889

1.Changing dependency for bench_fw to *_cpu instead of *_gpu
 - faiss_gpu and torch get incompatible. Once, that is fixed, I'll add gpu dependency back.
- today, we are not using gpu in benchmarking yet.

2.Fixing some naming issue in kmeans which is used when using opaque as false in assemble.
3.codec_name when it is not assigned explicitly, it happens when using assembly

Reviewed By: satymish

Differential Revision: D62671870

fbshipit-source-id: 4a4ecfeef948c99fffba407cbf69d2349544bdfd
  • Loading branch information
kuarora authored and facebook-github-bot committed Sep 25, 2024
1 parent c8d1474 commit d8aec60
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 22 deletions.
4 changes: 3 additions & 1 deletion benchs/bench_fw/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from statistics import mean, median
from typing import Any, Dict, List, Optional

import faiss # @manual=//faiss/python:pyfaiss_gpu
import faiss # @manual=//faiss/python:pyfaiss

import numpy as np

Expand Down Expand Up @@ -214,6 +214,7 @@ def set_io(self, benchmark_io: BenchmarkIO):
@dataclass
class TrainOperator(IndexOperator):
codec_descs: List[CodecDescriptor] = field(default_factory=lambda: [])
assemble_opaque: bool = True

def get_desc(self, name: str) -> Optional[CodecDescriptor]:
for desc in self.codec_descs:
Expand Down Expand Up @@ -248,6 +249,7 @@ def build_index_wrapper(self, codec_desc: CodecDescriptor):
factory=codec_desc.factory,
training_vectors=codec_desc.training_vectors,
codec_name=codec_desc.get_name(),
assemble_opaque=self.assemble_opaque,
)
index.set_io(self.io)
codec_desc.index = index
Expand Down
4 changes: 2 additions & 2 deletions benchs/bench_fw/benchmark_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
from typing import Any, Dict, List, Optional
from zipfile import ZipFile

import faiss # @manual=//faiss/python:pyfaiss_gpu
import faiss # @manual=//faiss/python:pyfaiss

import numpy as np
import submitit
from faiss.contrib.datasets import ( # @manual=//faiss/contrib:faiss_contrib_gpu
from faiss.contrib.datasets import ( # @manual=//faiss/contrib:faiss_contrib
dataset_from_name,
)

Expand Down
19 changes: 13 additions & 6 deletions benchs/bench_fw/descriptors.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,17 @@
from dataclasses import dataclass
from typing import Any, Dict, List, Optional

import faiss # @manual=//faiss/python:pyfaiss_gpu
import faiss # @manual=//faiss/python:pyfaiss

from .benchmark_io import BenchmarkIO
from .utils import timer

logger = logging.getLogger(__name__)


# Important: filenames end with . without extension (npy, codec, index),
# when writing files, you are required to filename + "npy" etc.

@dataclass
class IndexDescriptorClassic:
bucket: Optional[str] = None
Expand Down Expand Up @@ -110,21 +113,25 @@ def get_filename(
filename += "."
return filename

def get_kmeans_filename(self, k):
return f"{self.get_filename()}kmeans_{k}."

def k_means(self, io, k, dry_run):
logger.info(f"k_means {k} {self}")
kmeans_vectors = DatasetDescriptor(
tablename=f"{self.get_filename()}kmeans_{k}.npy"
tablename=f"{self.get_filename()}kmeans_{k}"
)
meta_filename = kmeans_vectors.tablename + ".json"
if not io.file_exist(kmeans_vectors.tablename) or not io.file_exist(
kmeans_filename = kmeans_vectors.get_filename() + "npy"
meta_filename = kmeans_vectors.get_filename() + "json"
if not io.file_exist(kmeans_filename) or not io.file_exist(
meta_filename
):
if dry_run:
return None, None, kmeans_vectors.tablename
return None, None, kmeans_filename
x = io.get_dataset(self)
kmeans = faiss.Kmeans(d=x.shape[1], k=k, gpu=True)
_, t, _ = timer("k_means", lambda: kmeans.train(x))
io.write_nparray(kmeans.centroids, kmeans_vectors.tablename)
io.write_nparray(kmeans.centroids, kmeans_filename)
io.write_json({"k_means_time": t}, meta_filename)
else:
t = io.read_json(meta_filename)["k_means_time"]
Expand Down
32 changes: 23 additions & 9 deletions benchs/bench_fw/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,18 @@
from dataclasses import dataclass
from typing import ClassVar, Dict, List, Optional

import faiss # @manual=//faiss/python:pyfaiss_gpu
import faiss # @manual=//faiss/python:pyfaiss
import numpy as np
from faiss.benchs.bench_fw.descriptors import IndexBaseDescriptor

from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib_gpu
from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib
knn_intersection_measure,
OperatingPointsWithRanges,
)
from faiss.contrib.factory_tools import ( # @manual=//faiss/contrib:faiss_contrib_gpu
from faiss.contrib.factory_tools import ( # @manual=//faiss/contrib:faiss_contrib
reverse_index_factory,
)
from faiss.contrib.ivf_tools import ( # @manual=//faiss/contrib:faiss_contrib_gpu
from faiss.contrib.ivf_tools import ( # @manual=//faiss/contrib:faiss_contrib
add_preassigned,
replace_ivf_quantizer,
)
Expand Down Expand Up @@ -635,11 +635,12 @@ def get_index_name(self) -> Optional[str]:

def fetch_index(self):
# read index from file if it is already available
index_filename = None
if self.index_path:
index_filename = os.path.basename(self.index_path)
else:
elif self.index_name:
index_filename = self.index_name + "index"
if self.io.file_exist(index_filename):
if index_filename and self.io.file_exist(index_filename):
if self.index_path:
index = self.io.read_index(
index_filename,
Expand Down Expand Up @@ -681,7 +682,7 @@ def fetch_index(self):
)
assert index.ntotal == xb.shape[0] or index_ivf.ntotal == xb.shape[0]
logger.info("Added vectors to index")
if self.serialize_full_index:
if self.serialize_full_index and index_filename:
codec_size = self.io.write_index(index, index_filename)
assert codec_size is not None

Expand Down Expand Up @@ -908,6 +909,7 @@ def get_codec(self):
class IndexFromFactory(Index):
factory: Optional[str] = None
training_vectors: Optional[DatasetDescriptor] = None
assemble_opaque: bool = True

def __post_init__(self):
super().__post_init__()
Expand All @@ -916,6 +918,19 @@ def __post_init__(self):
if self.factory != "Flat" and self.training_vectors is None:
raise ValueError(f"training_vectors is not set for {self.factory}")

def get_codec_name(self):
codec_name = super().get_codec_name()
if codec_name is None:
codec_name = f"{self.factory.replace(',', '_')}."
codec_name += f"d_{self.d}.{self.metric.upper()}."
if self.factory != "Flat":
assert self.training_vectors is not None
codec_name += self.training_vectors.get_filename("xt")
if self.construction_params is not None:
codec_name += IndexBaseDescriptor.param_dict_list_to_name(self.construction_params)
self.codec_name = codec_name
return self.codec_name

def fetch_meta(self, dry_run=False):
meta_filename = self.get_codec_name() + "json"
if self.io.file_exist(meta_filename):
Expand Down Expand Up @@ -1021,14 +1036,13 @@ def get_quantizer(self, dry_run, pretransform=None):
def assemble(self, dry_run):
logger.info(f"assemble {self.factory}")
model = self.get_model()
opaque = True
t_aggregate = 0
# try:
# reverse_index_factory(model)
# opaque = False
# except NotImplementedError:
# opaque = True
if opaque:
if self.assemble_opaque:
codec = model
else:
if isinstance(model, faiss.IndexPreTransform):
Expand Down
4 changes: 2 additions & 2 deletions benchs/bench_fw/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
from dataclasses import dataclass
from typing import Dict, List, Tuple

import faiss # @manual=//faiss/python:pyfaiss_gpu
import faiss # @manual=//faiss/python:pyfaiss

# from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib_gpu
# from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib
# OperatingPoints,
# )

Expand Down
4 changes: 2 additions & 2 deletions benchs/bench_fw/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
from multiprocessing.pool import ThreadPool
from time import perf_counter

import faiss # @manual=//faiss/python:pyfaiss_gpu
import faiss # @manual=//faiss/python:pyfaiss
import numpy as np

from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib_gpu
from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib
OperatingPoints,
)

Expand Down

0 comments on commit d8aec60

Please sign in to comment.