klintan · September 7, 2018 23:11
diff --git a/compute_confidence_score_with_similarity.py b/compute_confidence_score_with_similarity.py
 def compute_confidence_score_with_similarity(data, confidence, attribute_key):
    '''
    Compute the confidence score of a claim based on its already computed confidence score
    and the similarity between it and the other claims.
    Then set the confidence value to this new confidence computed with similarity measure.
    '''
    # get unique facts for object
    facts_set = data[attribute_key].unique()
    # create fact : confidence dict
    facts_confidence = {x[attribute_key]: x['confidence'] for _, x in data.iterrows()}
    # create an ordered confidence array
    facts_array = np.array(list(facts_confidence.values()))
    # create a copy to assign new adjusted confidence values for
    new_facts_array = copy.deepcopy(facts_array)
    for i, f in enumerate(facts_set):
        # for each source that provides this fact, update its  confidence (similarity factor here, like levenshtein
        similarity_sum = (1 - SIMILARITY_CONSTANT) * facts_array[i] + SIMILARITY_CONSTANT * sum(
            implicates(f, facts_confidence) * facts_array)
        # update the confidence score
        data.loc[data[attribute_key] == f, 'confidence'] = similarity_sum
 return (data, new_facts_array)
	def compute_confidence_score_with_similarity(data, confidence, attribute_key):
	'''
	Compute the confidence score of a claim based on its already computed confidence score
	and the similarity between it and the other claims.
	Then set the confidence value to this new confidence computed with similarity measure.
	'''
	# get unique facts for object
	facts_set = data[attribute_key].unique()
	# create fact : confidence dict
	facts_confidence = {x[attribute_key]: x['confidence'] for _, x in data.iterrows()}
	# create an ordered confidence array
	facts_array = np.array(list(facts_confidence.values()))
	# create a copy to assign new adjusted confidence values for
	new_facts_array = copy.deepcopy(facts_array)
	for i, f in enumerate(facts_set):
	# for each source that provides this fact, update its confidence (similarity factor here, like levenshtein
	similarity_sum = (1 - SIMILARITY_CONSTANT) * facts_array[i] + SIMILARITY_CONSTANT * sum(
	implicates(f, facts_confidence) * facts_array)
	# update the confidence score
	data.loc[data[attribute_key] == f, 'confidence'] = similarity_sum
	return (data, new_facts_array)