klintan · January 3, 2020 21:35
diff --git a/compute_source_trust.py b/compute_source_trust.py
 def compute_source_trust(data, sources):
    '''
    Compute every source trustworthiness. The trustworthiness score is the average confidence of
    all facts supplied by source w
    :param data: Dataframe all facts for object O
    :param sources: dict all unique sources and current scores
    :return: dict of unique sources with updated scores
    '''
    for source in sources:
            # t(w) trustworthiness of website w
            # the average confidence of all facts supplied by website/source w
            t_w = sum([confidence for confidence in data[data['source'] == source]['confidence'].values]) / len(
                data[data['source'] == source].index)
            # tau(w) trustworthiness score of website w
            # as explained in the paper, 1 - t(w) is usually quite small and multiplying many of them
            # might lead to underflow. Therefore we take the logarithm of it to better model how trustworthy a source is
            tau_w = -np.log(1 - t_w)
            # update the source score to the new score
            sources[source] = tau_w
    return sources
	def compute_source_trust(data, sources):
	'''
	Compute every source trustworthiness. The trustworthiness score is the average confidence of
	all facts supplied by source w
	:param data: Dataframe all facts for object O
	:param sources: dict all unique sources and current scores
	:return: dict of unique sources with updated scores
	'''
	for source in sources:
	# t(w) trustworthiness of website w
	# the average confidence of all facts supplied by website/source w
	t_w = sum([confidence for confidence in data[data['source'] == source]['confidence'].values]) / len(
	data[data['source'] == source].index)
	# tau(w) trustworthiness score of website w
	# as explained in the paper, 1 - t(w) is usually quite small and multiplying many of them
	# might lead to underflow. Therefore we take the logarithm of it to better model how trustworthy a source is
	tau_w = -np.log(1 - t_w)
	# update the source score to the new score
	sources[source] = tau_w
	return sources