-
-
Save cfinch/09685dc93954a1d9dfff0b9abc0a0129 to your computer and use it in GitHub Desktop.
Google Apps script for performing OCR on all images of a certain type found in the specified Drive folder. Extracts text to a Google sheet, where it's mapped to the image's filename.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function extractTextOnOpen() { | |
//ADD YOUR VALUES BELOW | |
var folderName = "[YOUR FOLDER NAME]"; | |
var sheetUrl = "[YOUR SHEET URL] (should start with https://docs.google.com/spreadsheets/d/)" | |
var imageType = "image/png" | |
//Define folder | |
var folder = DriveApp.getFoldersByName(folderName).next(); | |
var folderId = folder.getId(); | |
//Set up spreadsheet | |
var ss = SpreadsheetApp.openByUrl(sheetUrl); | |
var sheet = ss.getSheetByName("Sheet1") | |
Logger.log(sheet.getName()); | |
sheet.clear(); | |
sheet.appendRow(["Filename", "Text"]); | |
//Find all images in folder | |
var images = folder.getFilesByType(imageType); | |
while (images.hasNext()) { | |
//Convert each image to a Google Doc with OCR | |
var image = images.next(); | |
var imageName = image.getName(); | |
var docName = imageName.split("\.")[0]; | |
var file = { | |
title: docName, | |
mimeType: imageType | |
} | |
Drive.Files.insert(file, image, { ocr: true }); | |
//Move newly-created Google Doc in project folder | |
var newFile = DriveApp.getFilesByName(docName).next(); | |
folder.addFile(newFile); | |
var rootFolder = DriveApp.getRootFolder(); | |
rootFolder.removeFile(newFile); | |
var docId = newFile.getId(); | |
var doc = DocumentApp.openById(docId); | |
var name = doc.getName(); | |
var body = doc.getBody().getText(); | |
//Add item data to spreadsheet | |
sheet.appendRow([name, body]); | |
} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment