Last active
July 11, 2023 04:59
-
-
Save Adekoreday/88bbefd689d4026f5af31d886b9f17b2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Run Paginated Jobs | |
/** | |
* Cursor pagination is used here to guarantee efficiency as seen in this case mongoDB using async iterator | |
*/ | |
paginate: async function (schema, batchSize = 10) { | |
let result, cursor | |
const next = async () => { | |
if (cursor) { | |
result = await schema.find({'_id': {'$gt': cursor}}).limit(batchSize) | |
}else { | |
result = await schema.find().limit(batchSize) | |
} | |
if (result.length === 0) { | |
return { done: true, value: undefined } | |
} | |
cursor = result.slice(-1)[0]['_id'] | |
return { done: false, value: result } | |
} | |
return { | |
[Symbol.asyncIterator]() { | |
return { next } | |
} | |
} | |
}, | |
/** | |
* | |
* This has to be deprecated in after recs has been migrated | |
*/ | |
runJob: async function ({pageSize, dryRun, maxPage}) { | |
try { | |
// batch size is important to ensure we are not querying too much to run out of memory | |
const batchSize = Number(pageSize) || 10 | |
const stopAfterPage = Number(maxPage) | |
// we paginate the entity here | |
const pages = await this.paginate(Student, batchSize) | |
let currentPage = 0; | |
const migrationResults = [] | |
// we iterate using async iterator allowing us to use for of | |
for await (const page of pages) { | |
console.info(`Processing page ${currentPage++}`); | |
for (const student of page) { | |
migrationResults.push( | |
// this is the actual action running on each entity found | |
await this.handleEachItems(student, dryRun) | |
) | |
} | |
// we only add stop after page if we want to stop inbetween | |
if (stopAfterPage && currentPage >= stopAfterPage) { | |
console.debug(`Stopping after page ${currentPage}.`); | |
break; | |
} | |
} | |
// This summarize the result and logs accordingly | |
const updatedStudents = migrationResults.filter(Boolean); | |
console.info(`${updatedStudents.length} students migrated`); | |
console.info(`${migrationResults.length - updatedStudents.length} students migration failed`) | |
}catch (e) { | |
console.error(e, 'some error occoured during recs migration') | |
} | |
}, | |
# SUMMARY | |
The approach being used above is to use async iterators to fetch the items in pages, it gives a sort of observale approach to querying data and processing it, i'd leave the batch size to be a maximum of 100 to avoid processing too many request at a single time. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment