Last active
November 21, 2023 09:18
-
-
Save paulgrammer/741850da474e32d4ddd02656b17a1ae3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const cheerio = require("cheerio"); | |
const axios = require("axios"); | |
const fs = require("fs"); | |
const baseDir = "./bible"; | |
const BASE_URL = { | |
en: "https://www.jw.org/en/library/bible/nwt/books", | |
lg: "https://www.jw.org/lg/layibulale/bayibuli/nwt/ebitabo", | |
}; | |
if (!fs.existsSync(baseDir)) fs.mkdirSync(baseDir); | |
async function getBooks() { | |
let get = (url) => | |
axios | |
.get(url) | |
.then((response) => { | |
console.log(response.request.res.responseUrl); | |
const $ = cheerio.load(response.data); | |
let nodes = $("select[id=Book]").find("option"); | |
let books = []; | |
nodes.each(function (_, node) { | |
books.push($(node).html()); | |
}); | |
return books; | |
}) | |
.catch(() => []); | |
let lgPath = `${baseDir}/books.lg.txt`; | |
let enPath = `${baseDir}/books.en.txt`; | |
if (fs.existsSync(enPath) && fs.existsSync(lgPath)) { | |
let lg = fs.readFileSync(lgPath).toString().split("\n"); | |
let en = fs.readFileSync(enPath).toString().split("\n"); | |
return { en, lg }; | |
} | |
let books = await Promise.all([get(BASE_URL.en), get(BASE_URL.lg)]); | |
fs.writeFileSync(enPath, books[0].join("\n")); | |
fs.writeFileSync(lgPath, books[1].join("\n")); | |
return { | |
en: books[0], | |
lg: books[1], | |
}; | |
} | |
async function getChapters(book, url = BASE_URL.en) { | |
let chaptersDir = `${baseDir}/chapters`; | |
if (!fs.existsSync(chaptersDir)) fs.mkdirSync(chaptersDir); | |
let get = () => | |
axios | |
.get(`${url}/${book}`.replace(/\s/g, "-")) | |
.then((response) => { | |
console.log(response.request.res.responseUrl); | |
const $ = cheerio.load(response.data); | |
let nodes = $("div").find(".chapters").find("a"); | |
let chapters = []; | |
nodes.each(function (_, node) { | |
chapters.push($(node).html()); | |
}); | |
return chapters; | |
}) | |
.catch(() => []); | |
if (fs.existsSync(`${chaptersDir}/${book}`)) { | |
return fs.readFileSync(`${chaptersDir}/${book}`).toString().split("\n"); | |
} | |
let chapters = await get(); | |
fs.writeFileSync(`${chaptersDir}/${book}`, chapters.join("\n")); | |
return chapters; | |
} | |
async function getScriptures(bookIndex, chapter) { | |
let books = await getBooks(); | |
let scripturesDir = `${baseDir}/scriptures`; | |
let scriptureDir = `${scripturesDir}/${books["en"][bookIndex]}`; | |
let get = (language) => { | |
let filePath = `${scriptureDir}/${chapter}.${language}.txt`; | |
if (fs.existsSync(filePath)) { | |
return; | |
} | |
return axios | |
.get( | |
`${BASE_URL[language]}/${books[language][bookIndex]}/${chapter}`.replace( | |
/\s/g, | |
"-" | |
) | |
) | |
.then((response) => { | |
console.log(response.request.res.responseUrl); | |
const $ = cheerio.load(response.data); | |
let nodes = $("div[id=bibleText]"); | |
let scriptures = nodes | |
.text() | |
.trim() | |
.replace(/\+/g, "") | |
.replace(/\*/g, "") | |
.replace(/\“/g, '"') | |
.replace(/\”/g, '"') | |
.replace(/\’/g, "'") | |
.split("\n") | |
.filter((line) => line.length); | |
if (!fs.existsSync(scriptureDir)) fs.mkdirSync(scriptureDir); | |
fs.writeFileSync(filePath, scriptures.join("\n")); | |
return scriptures; | |
}) | |
.catch(() => []); | |
}; | |
return await Promise.all([get("en"), get("lg")]); | |
} | |
getBooks().then(async (books) => { | |
let { en: list } = books; | |
let bookIndex = 0; | |
let next = async () => { | |
let book = list.shift(); | |
if (!book) { | |
return console.log("done!"); | |
} | |
await fetchScriptures(book, bookIndex); | |
bookIndex++; | |
next(); | |
}; | |
next(); | |
function fetchScriptures(book, bookIndex) { | |
return new Promise(async (resolve) => { | |
let chapterCount = 1; | |
let chapters = await getChapters(book); | |
let next = async (bookIndex) => { | |
let chapter = chapters.shift(); | |
if (!chapter) { | |
return resolve(); | |
} | |
await getScriptures(bookIndex, chapterCount); | |
chapterCount++; | |
next(bookIndex); | |
}; | |
next(bookIndex); | |
}); | |
} | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment