Skip to content

Instantly share code, notes, and snippets.

@klintan
Created September 7, 2018 23:11
Show Gist options
  • Save klintan/4f9f94b2fd695240ae891b89e5a3789b to your computer and use it in GitHub Desktop.
Save klintan/4f9f94b2fd695240ae891b89e5a3789b to your computer and use it in GitHub Desktop.
Truthfinder 5
def compute_confidence_score_with_similarity(data, confidence, attribute_key):
'''
Compute the confidence score of a claim based on its already computed confidence score
and the similarity between it and the other claims.
Then set the confidence value to this new confidence computed with similarity measure.
'''
# get unique facts for object
facts_set = data[attribute_key].unique()
# create fact : confidence dict
facts_confidence = {x[attribute_key]: x['confidence'] for _, x in data.iterrows()}
# create an ordered confidence array
facts_array = np.array(list(facts_confidence.values()))
# create a copy to assign new adjusted confidence values for
new_facts_array = copy.deepcopy(facts_array)
for i, f in enumerate(facts_set):
# for each source that provides this fact, update its confidence (similarity factor here, like levenshtein
similarity_sum = (1 - SIMILARITY_CONSTANT) * facts_array[i] + SIMILARITY_CONSTANT * sum(
implicates(f, facts_confidence) * facts_array)
# update the confidence score
data.loc[data[attribute_key] == f, 'confidence'] = similarity_sum
return (data, new_facts_array)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment