Last active
June 12, 2020 06:31
-
-
Save p410n3/b85c98169a8887fd306247172b7caeef to your computer and use it in GitHub Desktop.
MLZ Telegram Deno Bot
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Lazier Debugging | |
const log = console.log; | |
// Constants | |
const url = 'https://www.muensterlandzeitung.de/'; | |
const botUrl = 'https://api.telegram.org/<bot-token>/sendMessage?chat_id=@muensterlandzeitung&text='; | |
const historyFile = './history.json'; | |
let history = []; | |
try { history = JSON.parse(readHistory()); } catch(e) {} | |
// RegEx's are defined here | |
const regExArticleHeadersFromHomePage = /<h2 class="article-heading.*?">(?<content>.*?)<\/h2>/gms; | |
const regExAnchors = /<a title="(?<title>.*?)" href="(?<href>.*?)">/gms; | |
// Think of this like a main() function | |
let homePage = fetch('https://www.muensterlandzeitung.de/') | |
.then(res => res.text()) | |
.then(data => { | |
let articles = getGroupsFromRegex(data, regExArticleHeadersFromHomePage); | |
let links = articles.map(article => { | |
return getGroupsFromRegex(article.content, regExAnchors)[0]; // As getGroupsFromRegex returns an array, we only pass the first element from it | |
}); | |
// We need the clean link array | |
links = removeDuplicateLinks(links); | |
// Loop over links and check against the history | |
links.map(link => { | |
let boolFoundInHistory = false; | |
// Now lets loop over the history and see if any links are not yet in there | |
history.map(el => { | |
if(link.title === el.title) boolFoundInHistory = true; | |
}); | |
if(!boolFoundInHistory) { | |
sendTelegramRequest(link); | |
history.push(link); // Put into history so it does not get send again | |
} | |
}); | |
// Now after all that, write the history again | |
writeHistory(history); | |
}); | |
// Helper functions down here | |
function getGroupsFromRegex(data, regex) { | |
let result = ''; | |
let resultArray = []; | |
while((result = regex.exec(data)) != null) { | |
resultArray.push(result.groups); | |
} | |
return resultArray; | |
} | |
function removeDuplicateLinks(links) { | |
let returnArray = []; | |
// The easy way out: just throw every second link away | |
for(let i = 0; i < links.length; i += 2) { | |
returnArray.push(links[i]); | |
} | |
// Another thing that happens are empty array elements. Remove them here | |
return returnArray.filter(el => { | |
if(el) return true; | |
}); | |
} | |
// Reads History from JSON | |
function readHistory() { | |
return Deno.readTextFileSync(historyFile); | |
} | |
// Writes history into a json | |
async function writeHistory(data) { | |
await Deno.writeTextFileSync(historyFile, JSON.stringify(data)); | |
} | |
function sendTelegramRequest(link) { | |
fetch(botUrl + encodeURI(link.title) + "%20" + encodeURI(url + link.href)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment