Skip to content

Instantly share code, notes, and snippets.

@marcelcaraciolo
Created September 15, 2011 21:47
Show Gist options
  • Select an option

  • Save marcelcaraciolo/1220584 to your computer and use it in GitHub Desktop.

Select an option

Save marcelcaraciolo/1220584 to your computer and use it in GitHub Desktop.
bm25f
# BM25F Model
def bm25(idf, tf, fl, avgfl, B, K1):
# idf - inverse document frequency
# tf - term frequency in the current document
# fl - field length in the current document
# avgfl - average field length across documents in collection
# B, K1 - free paramters
return idf * ((tf * (K1 + 1)) / (tf + K1 * (1 - B + B * (fl / avgfl))))
class BM25F(WeightingModel):
"""Implements the BM25F scoring algorithm.
"""
def __init__(self, B=0.75, K1=1.2, **kwargs):
"""
>>> from whoosh import scoring
>>> # Set a custom B value for the "content" field
>>> w = scoring.BM25F(B=0.75, content_B=1.0, K1=1.5)
:param B: free parameter, see the BM25 literature. Keyword arguments of
the form ``fieldname_B`` (for example, ``body_B``) set field-
specific values for B.
:param K1: free parameter, see the BM25 literature.
"""
self.B = B
self.K1 = K1
self._field_B = {}
for k, v in iteritems(kwargs):
if k.endswith("_B"):
fieldname = k[:-2]
self._field_B[fieldname] = v
def supports_block_quality(self):
return True
def scorer(self, searcher, fieldname, text, qf=1):
if not searcher.schema[fieldname].scorable:
return WeightScorer.for_(searcher, fieldname, text)
if fieldname in self._field_B:
B = self._field_B[fieldname]
else:
B = self.B
return BM25FScorer(searcher, fieldname, text, B, self.K1, qf=qf)
class BM25FScorer(WeightLengthScorer):
def __init__(self, searcher, fieldname, text, B, K1, qf=1):
# IDF and average field length are global statistics, so get them from
# the top-level searcher
parent = searcher.get_parent() # Returns self if no parent
self.idf = parent.idf(fieldname, text)
self.avgfl = parent.avg_field_length(fieldname) or 1
self.B = B
self.K1 = K1
self.qf = qf
self.setup(searcher, fieldname, text)
def _score(self, weight, length):
s = bm25(self.idf, weight, length, self.avgfl, self.B, self.K1)
return s
@amirouche
Copy link
Copy Markdown

Thanks!

@amirouche
Copy link
Copy Markdown

How do you use boost value on fields?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment