Skip to content

Instantly share code, notes, and snippets.

@aus10powell
Created February 4, 2020 15:04
Show Gist options
  • Save aus10powell/7edf80488e30a4d6a173854968686a1b to your computer and use it in GitHub Desktop.
Save aus10powell/7edf80488e30a4d6a173854968686a1b to your computer and use it in GitHub Desktop.
This is some snippets of code to get started with Snorkel
Display the source blob
Display the rendered blob
Raw
# make sure all packages import
from snorkel.labeling import labeling_function
from snorkel.utils import to_int_label_array
from snorkel.labeling import PandasLFApplier
@labeling_function()
def child_in_msg(x):
"""Postive examples (unknown otherwise) if child's name is in message.
"""
return 1 if x.pat_first_in_msg == 1 else -1
@labeling_function()
def child_is_older(x):
"""Positive and negative examples for different cut-offs for child's age.
"""
if x.age < 15.5:
return 1
elif 15.5 <= x.age < 17:
return -1
elif 16 <= x.age:
return 0
@labeling_function()
def email_domain(x):
"""Gmail is a strong signal for not being a parent in text.
"""
return 0 if "gmail" in x.email_domain else -1
@labeling_function()
def is_proxy(x):
"""Positive example (0 otherwise) if a proxy account.
"""
return 1 if x.proxy_account_yn == "Y" else 0
@labeling_function()
def email_domain2(x):
"""Oracle is not a email domain easily accessable to teens.
"""
return 1 if "oracle" not in x.email_domain else -1
# Assign labeling functions
lfs = [child_in_msg, child_is_older, email_domain, email_domain2, reading_fog, is_proxy]
applier = PandasLFApplier(lfs=lfs)
L_train = applier.apply(df=X_train)
L_dev = applier.apply(df=X_val)
# Check coverage for different labeling functions
c1, c2, c3, c4, c5, c6 = (L_train != -1).mean(axis=0)
print(f"check_out coverage: {c1 * 100:.1f}%")
print(f"check coverage: {c2 * 100:.1f}%")
print(f"check coverage: {c3 * 100:.1f}%")
print(f"check coverage: {c4 * 100:.1f}%")
print(f"check coverage: {c5 * 100:.1f}%")
print(f"check coverage: {c6 * 100:.1f}%")
# Full analysis (including coverage) of how well labeling functions are performing
from snorkel.labeling import LFAnalysis
LFAnalysis(L_train, lfs).lf_summary()
#####################
# Labeling functions looking at text
#####################
from snorkel.labeling import LabelingFunction
def keyword_lookup(x, keywords, label):
if any(word in x.msg_txt.lower() for word in keywords):
return label
return -1
def make_keyword_lf(keywords, label=1):
return LabelingFunction(
name=f"keyword_{keywords[0]}",
f=keyword_lookup,
resources=dict(keywords=keywords, label=label),
)
#######################################################################
# Positive or unknown keywords
# In my case I was interested in giveaways that a parent was communicating
# with a clinician versus the teen (who should have been communicating)
#######################################################################
keyword_my_son = make_keyword_lf(keywords=["son"])
keyword_my_daughter = make_keyword_lf(keywords=["my daughter"])
keyword_my_child = make_keyword_lf(keywords=["my child", "your child"])
keyword_son = make_keyword_lf(keywords=["son", "he", "him"])
keyword_referring = make_keyword_lf(keywords=["for us", "our"])
keyword_daughter = make_keyword_lf(keywords=["daughter", "she", "her"])
keyword_child = make_keyword_lf(keywords=["child", "parent", "child's"])
#######################################################################
# Negative or unknown keywords
#######################################################################
keyword_my_parent = make_keyword_lf(
keywords=["father", "mother", "mom", "dad", "my mom", "my dad"], label=0
)
lfs = [
child_in_msg,
# keyword_my_son,
keyword_my_daughter,
keyword_my_child,
keyword_son,
keyword_daughter,
keyword_child,
child_is_older,
reading_fog,
# Negative examples
keyword_my_parent,
email_domain,
email_domain2,
keyword_referring,
is_proxy,
]
applier = PandasLFApplier(lfs=lfs)
L_train = applier.apply(df=X_train)
L_dev = applier.apply(df=X_val)
L_valid = applier.apply(df=X_test)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment