Created
July 15, 2020 04:55
-
-
Save bhaskara/6a740ca19d69838fc310fffbf08b07f7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.cluster import DBSCAN | |
class OutlierDetector(object): | |
"""Detect outlier users | |
Parameters | |
---------- | |
users : [User] | |
outlier_detector : DBScan or IsolationForest or other outlier detector or None | |
Defaults to DBScan | |
""" | |
def __init__(self, users, outlier_detector=None): | |
self._users = users | |
self._policies = list({pol for u in users for pol in u.policies}) | |
if outlier_detector is None: | |
outlier_detector = DBSCAN( | |
eps = 1, | |
metric="euclidean", | |
min_samples = 3, | |
n_jobs = -1) | |
self._outlier_detector = outlier_detector | |
self._user_policies = np.array([self._policy_vec(u) for u in self._users]) | |
def _policy_vec(self, user): | |
return np.array([pol in user.policies for pol in self._policies], dtype=float) | |
def current_outliers(self): | |
"""Return outliers among input users | |
Returns | |
------- | |
cluster_vals : [int] | |
Negative values correspond to outlier indexes among users. | |
""" | |
return self._outlier_detector.fit_predict(self._user_policies) | |
det = OutlierDetector(users) | |
outliers = det.current_outliers() | |
print(outliers) | |
print([user for i, user in enumerate(users) if outliers[i] < 0]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment