Skip to content

Instantly share code, notes, and snippets.

@cfinch
Forked from kltng/batch_ocr.gs
Last active July 27, 2023 17:10
Show Gist options
  • Save cfinch/09685dc93954a1d9dfff0b9abc0a0129 to your computer and use it in GitHub Desktop.
Save cfinch/09685dc93954a1d9dfff0b9abc0a0129 to your computer and use it in GitHub Desktop.
Google Apps script for performing OCR on all images of a certain type found in the specified Drive folder. Extracts text to a Google sheet, where it's mapped to the image's filename.
function extractTextOnOpen() {
//ADD YOUR VALUES BELOW
var folderName = "[YOUR FOLDER NAME]";
var sheetUrl = "[YOUR SHEET URL] (should start with https://docs.google.com/spreadsheets/d/)"
var imageType = "image/png"
//Define folder
var folder = DriveApp.getFoldersByName(folderName).next();
var folderId = folder.getId();
//Set up spreadsheet
var ss = SpreadsheetApp.openByUrl(sheetUrl);
var sheet = ss.getSheetByName("Sheet1")
Logger.log(sheet.getName());
sheet.clear();
sheet.appendRow(["Filename", "Text"]);
//Find all images in folder
var images = folder.getFilesByType(imageType);
while (images.hasNext()) {
//Convert each image to a Google Doc with OCR
var image = images.next();
var imageName = image.getName();
var docName = imageName.split("\.")[0];
var file = {
title: docName,
mimeType: imageType
}
Drive.Files.insert(file, image, { ocr: true });
//Move newly-created Google Doc in project folder
var newFile = DriveApp.getFilesByName(docName).next();
folder.addFile(newFile);
var rootFolder = DriveApp.getRootFolder();
rootFolder.removeFile(newFile);
var docId = newFile.getId();
var doc = DocumentApp.openById(docId);
var name = doc.getName();
var body = doc.getBody().getText();
//Add item data to spreadsheet
sheet.appendRow([name, body]);
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment