Created
September 7, 2018 23:11
-
-
Save klintan/4f9f94b2fd695240ae891b89e5a3789b to your computer and use it in GitHub Desktop.
Truthfinder 5
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def compute_confidence_score_with_similarity(data, confidence, attribute_key): | |
''' | |
Compute the confidence score of a claim based on its already computed confidence score | |
and the similarity between it and the other claims. | |
Then set the confidence value to this new confidence computed with similarity measure. | |
''' | |
# get unique facts for object | |
facts_set = data[attribute_key].unique() | |
# create fact : confidence dict | |
facts_confidence = {x[attribute_key]: x['confidence'] for _, x in data.iterrows()} | |
# create an ordered confidence array | |
facts_array = np.array(list(facts_confidence.values())) | |
# create a copy to assign new adjusted confidence values for | |
new_facts_array = copy.deepcopy(facts_array) | |
for i, f in enumerate(facts_set): | |
# for each source that provides this fact, update its confidence (similarity factor here, like levenshtein | |
similarity_sum = (1 - SIMILARITY_CONSTANT) * facts_array[i] + SIMILARITY_CONSTANT * sum( | |
implicates(f, facts_confidence) * facts_array) | |
# update the confidence score | |
data.loc[data[attribute_key] == f, 'confidence'] = similarity_sum | |
return (data, new_facts_array) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment