Skip to content

Instantly share code, notes, and snippets.

@islem-esi
Created April 9, 2021 21:19
Show Gist options
  • Save islem-esi/6e7b679e0eb443bf35c96425fd1aeaaf to your computer and use it in GitHub Desktop.
Save islem-esi/6e7b679e0eb443bf35c96425fd1aeaaf to your computer and use it in GitHub Desktop.
example for summarization models
import sumy
from sumy.nlp.tokenizers import Tokenizer
from sumy.parsers.plaintext import PlaintextParser
#LSA algorithm
from sumy.summarizers.lsa import LsaSummarizer
#text: text to summarize
#no_sentences: number of sentences in your summary,
#lang: language of text
def lsa_summary(text, no_sentences, lang):
parser = PlaintextParser.from_string(text, Tokenizer(lang))
lsa_sum = LsaSummarizer()
summary = lsa_sum(parser.document, no_sentences)
gc.collect()
return [str(sentence) for sentence in summary]
#Luhn
from sumy.summarizers.luhn import LuhnSummarizer
#text: text to summarize
#no_sentences: number of sentences in your summary,
#lang: language of text
def luhn_summary(text, no_sentences, lang):
parser = PlaintextParser(text, Tokenizer(lang))
luhn_sum = LuhnSummarizer()
summary = luhn_sum(parser.document, no_sentences)
gc.collect()
return [str(sentence) for sentence in summary]
#LexRank
from sumy.summarizers.lex_rank import LexRankSummarizer
#text: text to summarize
#no_sentences: number of sentences in your summary,
#lang: language of text
def lex_summary(text, no_sentences, lang):
parser = PlaintextParser.from_string(text,Tokenizer(lang))
lex_sum = LexRankSummarizer()
summary = lex_sum(parser.document, no_sentences)
gc.collect()
return[str(sentence) for sentence in summary]
#KL
from sumy.summarizers.kl import KLSummarizer
#text: text to summarize
#no_sentences: number of sentences in your summary,
#lang: language of text
def kl_summary(text, no_sentences, lang):
parser = PlaintextParser.from_string(text,Tokenizer(lang))
kl_summarizer=KLSummarizer()
summary=kl_summarizer(parser.document,sentences_count=no_sentences)
gc.collect()
return [str(sentence) for sentence in summary]
#Transformers T5
from transformers import T5Tokenizer, T5Config, T5ForConditionalGeneration
#text: text to summarize
#model: t5-base, t5-small, t5-large, t5-3b, t5-11b
def t5_summary(text, model):
my_model = T5ForConditionalGeneration.from_pretrained(model)
tokenizer = T5Tokenizer.from_pretrained(model)
input_ids=tokenizer.encode("summarize:"+text, return_tensors='pt', max_length = 512, truncation=True)
summary_ids = my_model.generate(input_ids)
t5_sum = tokenizer.decode(summary_ids[0])
gc.collect()
return(str(t5_sum))
#BART
from transformers import BartForConditionalGeneration, BartTokenizer, BartConfig
#text: text to summarize
#model: bart-base, bart-large, bart-large-cnn
def bart_summary(text, model):
tokenizer=BartTokenizer.from_pretrained('facebook/'+str(model))
model=BartForConditionalGeneration.from_pretrained('facebook/'+str(model))
inputs = tokenizer.batch_encode_plus(text,return_tensors='pt', padding=True, truncation=True)
summary_ids = model.generate(inputs['input_ids'], early_stopping=True)
bart_summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
gc.collect()
return(str(bart_summary))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment