Last active
August 30, 2019 18:45
-
-
Save jchristopher/fc1d461bd82b40e31290 to your computer and use it in GitHub Desktop.
Tell SearchWP to extract PDF contents of a file uploaded to an ACF file upload field and store that content with the post itself as custom metadata, not the Media entry
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Extract PDF content from a file uploaded via ACF File field (where | |
* the actual data stored is only the Media ID) and store PDF | |
* content as SearchWP 'extra' metadata so it can be included in | |
* searches and attributed to the post itself, not the Media entry | |
*/ | |
class My_SearchWP_Customizations { | |
// ACF field name for PDF upload field | |
private $my_acf_field_name = 'acf_pdf_field_name'; | |
// custom field key used by SearchWP to store PDF content with the post | |
private $my_searchwp_acf_pdf_custom_field_key = 'searchwp_pdf_content'; | |
/** | |
* Initializer; hook into SearchWP where necessary | |
*/ | |
function init() { | |
// hook into SearchWP's indexer | |
add_filter( 'searchwp_extra_metadata', array( $this, 'maybe_extract_pdf_contents_from_acf_field' ), 10, 2 ); | |
add_filter( 'searchwp_custom_field_keys', array( $this, 'add_custom_pdf_content_metakey' ), 10, 1 ); | |
} | |
/** | |
* Callback from the SearchWP indexer to extract content from a PDF uploaded to the specified | |
* ACF upload field | |
* | |
* @param array $post_metadata The post metadata to index | |
* @param object $post_to_index The post object currently being indexed | |
* | |
* @return array The post metadata to index | |
*/ | |
function maybe_extract_pdf_contents_from_acf_field( $post_metadata, $post_to_index ) { | |
// at the very least we need either the SearchWP indexer, or SearchWP Xpdf Integration | |
if ( ! class_exists( 'SearchWPIndexer' ) && ! class_exists( 'SearchWPXpdf' ) ) { | |
return $post_metadata; | |
} | |
// check for ACF field value to work with | |
$pdf_object_id = get_field( $this->my_acf_field_name ); | |
// if there's no ACF value, there's no sense in continuing | |
if ( empty( $pdf_object_id ) ) { | |
return $post_metadata; | |
} | |
// grab the Media post object that is the PDF | |
$pdf_object = get_post( absint( $pdf_object_id ) ); | |
// make sure it's a PDF | |
if ( 'application/pdf' == $pdf_object->post_mime_type ) { | |
return $post_metadata; | |
} | |
// if Xpdf Integration is enabled, we can use Xpdf here | |
if ( class_exists( 'SearchWPXpdf' ) ) { | |
// use Xpdf to parse the PDF | |
$xpdf = new SearchWPXpdf(); | |
$xpdf->init(); | |
$pdf_content = $xpdf->extract_pdf_content( '', get_attached_file( $pdf_object->ID ) ); | |
} else { | |
// fall back to built-in PDF parsing and grab the PDF content | |
$indexer = new SearchWPIndexer(); | |
$indexer->set_post( $pdf_object ); | |
$pdf_content = $indexer->extract_pdf_text( $pdf_object->ID ); | |
} | |
// add it to the pseudo-metadata array | |
$post_metadata[ $this->my_searchwp_acf_pdf_custom_field_key ] = $pdf_content; | |
return $post_metadata; | |
} | |
/** | |
* Make sure our 'extra' metadata key is visible in the Custom Fields dropdown on the settings page | |
* | |
* @param array $keys The Custom Field keys to show on the settings screen | |
* | |
* @return array The Custom Field keys to show on the settings screen | |
*/ | |
function add_custom_pdf_content_metakey( $keys ) { | |
$keys[] = $this->my_searchwp_acf_pdf_custom_field_key; | |
return $keys; | |
} | |
} | |
// liftoff | |
$my_searchwp_customizations = new My_SearchWP_Customizations(); | |
$my_searchwp_customizations->init(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Unfortunately not working for me. Added it in functions.php and replaced the File field name with the field name that I am using for my File field. Also changed the output of the ACF file field to the ID instead of the default option that is Object. Am I doing anything wrong?