beamds.beam.similarity package#

Submodules#

beamds.beam.similarity.config module#

class beamds.beam.similarity.config.SimilarityConfig(*args, _store_init_path=None, _save_init_args=True, **kwargs)[source]#

Bases: BeamConfig

parameters = [BeamParam(name='vector_dimension', type=<class 'int'>, default=None, help='dimension of the vectors', tags=None), BeamParam(name='expected_population', type=<class 'int'>, default=1000000, help='expected population of the index', tags=None), BeamParam(name='metric', type=<class 'str'>, default='l2', help='distance metric [l2, cosine, max, prod, quantile]', tags=None), BeamParam(name='training_device', type=<class 'str'>, default='cpu', help='device for training', tags=None), BeamParam(name='inference_device', type=<class 'str'>, default='cpu', help='device for inference', tags=None), BeamParam(name='dense_model_device', type=<class 'str'>, default='cuda', help='device for dense model', tags=None), BeamParam(name='ram_footprint', type=<class 'int'>, default=256000000000, help='RAM footprint', tags=None), BeamParam(name='gpu_footprint', type=<class 'int'>, default=24000000000, help='GPU footprint', tags=None), BeamParam(name='exact', type=<class 'bool'>, default=False, help='exact search', tags=None), BeamParam(name='nlists', type=<class 'int'>, default=None, help='number of lists for IVF', tags=None), BeamParam(name='faiss_M', type=<class 'int'>, default=None, help='M for IVFPQ', tags=None), BeamParam(name='reducer', type=<class 'str'>, default='umap', help='dimensionality reduction method', tags=None), BeamParam(name='quantile', type=<class 'float'>, default=0.9, help='quantile for the quantile metric', tags=None)]#
class beamds.beam.similarity.config.TFIDFConfig(*args, _store_init_path=None, _save_init_args=True, **kwargs)[source]#

Bases: TransformerConfig

defaults = {'metric': 'bm25'}#
parameters = [BeamParam(name='max_features', type=<class 'int'>, default=None, help='maximum number of features', tags=None), BeamParam(name='max_df', type=<class 'float'>, default=0.95, help='maximum document frequency', tags=None), BeamParam(name='min_df', type=<class 'float'>, default=2, help='minimum document frequency', tags=None), BeamParam(name='use_idf', type=<class 'bool'>, default=True, help='use inverse document frequency', tags=None), BeamParam(name='smooth_idf', type=<class 'bool'>, default=True, help='smooth inverse document frequency', tags=None), BeamParam(name='sublinear_tf', type=<class 'bool'>, default=False, help='apply sublinear term frequency scaling', tags=None), BeamParam(name='sparse_framework', type=<class 'str'>, default='torch', help='sparse framework, can be "torch" or "scipy"', tags=None), BeamParam(name='sparse_layout', type=<class 'str'>, default='coo', help='sparse layout, can be "coo" or "csr"', tags=None), BeamParam(name='norm', type=<class 'str'>, default='l2', help='Each output row will have unit norm, either [l1, l2, none]', tags=None), BeamParam(name='bm25_k1', type=<class 'float'>, default=1.5, help='bm25 k1 parameter', tags=None), BeamParam(name='bm25_b', type=<class 'float'>, default=0.75, help='bm25 b parameter', tags=None), BeamParam(name='bm25_epsilon', type=<class 'float'>, default=0.25, help='bm25 epsilon parameter', tags=None)]#

beamds.beam.similarity.core module#

class beamds.beam.similarity.core.BeamSimilarity(*args, _store_init_path=None, _save_init_args=True, **kwargs)[source]#

Bases: Processor

add(x, index=None, **kwargs)[source]#
add_index(x, index=None)[source]#
static extract_data_and_index(x, index=None, convert_to='numpy')[source]#
get_index(index)[source]#
property is_trained#
property metric_type#
property ntotal#
reconstruct(id0)[source]#
reconstruct_n(id0, id1)[source]#
remove_ids(ids)[source]#
reset()[source]#
search(x, k=1) Similarities[source]#
train(x)[source]#
class beamds.beam.similarity.core.Similarities(index: Any, distance: Any, sparse_scores: Any = None, metric: str = None, model: str = None)[source]#

Bases: object

distance: Any#
index: Any#
metric: str = None#
model: str = None#
sparse_scores: Any = None#

beamds.beam.similarity.dense module#

beamds.beam.similarity.sparnn module#

beamds.beam.similarity.sparse module#

beamds.beam.similarity.text module#

beamds.beam.similarity.tfidf module#

Module contents#