Skip to content

Instantly share code, notes, and snippets.

@GaryJones
Last active June 12, 2025 23:08
Show Gist options
  • Save GaryJones/eb04cc4f0dc592ff51ddb85b1bd2e399 to your computer and use it in GitHub Desktop.
Save GaryJones/eb04cc4f0dc592ff51ddb85b1bd2e399 to your computer and use it in GitHub Desktop.
Custom plugins for the Distributor WordPress plugin to automatically fetch and publish posts every 5 minutes, and modify the created post as a link post format with the original URL has the post link.
<?php
/**
* Plugin Name: Distributor Auto Pull
* Description: Automatically pulls posts from external sites using the Distributor plugin.
* Version: 1.0.0
* Author: Automattic
* License: GPL v2 or later
* License URI: https://www.gnu.org/licenses/gpl-2.0.html
*/
namespace Automattic\DistributorAutoPull;
// Exit if accessed directly
if ( ! defined( 'ABSPATH' ) ) {
exit;
}
// Set up cron job for automatic pulling
add_action( 'init', __NAMESPACE__ . '\\setup_cron' );
// Register cron event
add_action( 'distributor_auto_pull_cron', __NAMESPACE__ . '\\pull_posts' );
// Register WP-CLI command if available
if ( defined( 'WP_CLI' ) && \WP_CLI ) {
\WP_CLI::add_command( 'distributor-auto-pull', __NAMESPACE__ . '\\CLI' );
}
// Disable EXIF processing to avoid warnings
add_filter( 'wp_read_image_metadata', function( $meta, $file ) {
return array();
}, 10, 2 );
// Register the custom cron schedule
add_filter( 'cron_schedules', __NAMESPACE__ . '\\distributor_auto_pull_cron_schedules' );
/**
* Add custom cron schedule.
*
* @param array $schedules Array of cron schedules.
* @return array Modified array of cron schedules.
*/
function distributor_auto_pull_cron_schedules( $schedules ) {
$schedules['distributor_auto_pull_cron_interval'] = array(
'interval' => 300, // 5 minutes in seconds
'display' => __( 'Distributor Auto Pull - Every 5 Minutes', 'distributor-auto-pull' )
);
return $schedules;
}
/**
* Set up the cron job
*/
function setup_cron() {
if ( ! wp_next_scheduled( 'distributor_auto_pull_cron' ) ) {
wp_schedule_event( time(), 'distributor_auto_pull_cron_interval', 'distributor_auto_pull_cron' );
}
}
/**
* Get external connections
*
* @return array Array of external connection objects
*/
function get_external_connections() {
$connections = array();
// Get all external connection posts
$external_connections = new \WP_Query(
array(
'post_type' => 'dt_ext_connection',
'fields' => 'ids',
'no_found_rows' => true,
'posts_per_page' => 100,
)
);
if ( empty( $external_connections->posts ) ) {
return $connections;
}
foreach ( $external_connections->posts as $external_connection_id ) {
$external_connection_type = get_post_meta( $external_connection_id, 'dt_external_connection_type', true );
$external_connection_status = get_post_meta( $external_connection_id, 'dt_external_connections', true );
if ( empty( $external_connection_status ) || empty( $external_connection_status['can_get'] ) ) {
continue;
}
$connection = \Distributor\ExternalConnection::instantiate( $external_connection_id );
if ( ! is_wp_error( $connection ) ) {
$connections[] = $connection;
}
}
return $connections;
}
/**
* Pull posts from external site
*
* @return array Results of the pull operation
*/
function pull_posts() {
$results = array(
'connections_checked' => 0,
'posts_pulled' => 0,
'errors' => array(),
'debug' => array(),
);
// Get all external connections
$connections = get_external_connections();
$results['debug'][] = sprintf(
'Found %d total connections',
count( $connections )
);
foreach ( $connections as $connection ) {
$results['debug'][] = sprintf(
'Checking connection: %s (type: %s)',
$connection->id,
get_class( $connection )
);
$results['connections_checked']++;
// Get available post types using Distributor's built-in functionality
$available_post_types = \Distributor\Utils\available_pull_post_types( $connection, 'external' );
$results['debug'][] = sprintf(
'Available post types for connection %s: %s',
$connection->id,
! empty( $available_post_types ) ? implode( ', ', wp_list_pluck( $available_post_types, 'slug' ) ) : 'none'
);
if ( empty( $available_post_types ) ) {
$results['errors'][] = sprintf(
'No available post types for connection %s',
$connection->id
);
continue;
}
// Get posts from each post type
foreach ( $available_post_types as $post_type ) {
$results['debug'][] = sprintf(
'Fetching posts for connection %s, post type %s',
$connection->id,
$post_type['slug']
);
// Use Distributor's built-in remote_get with default pagination
$remote_posts = $connection->remote_get( [
'post_type' => $post_type['slug'],
'post_status' => 'publish',
] );
if ( is_wp_error( $remote_posts ) ) {
$results['errors'][] = sprintf(
'Error getting posts for connection %s, post type %s: %s',
$connection->id,
$post_type['slug'],
$remote_posts->get_error_message()
);
continue;
}
if ( empty( $remote_posts['items'] ) ) {
$results['debug'][] = sprintf(
'No posts found for connection %s, post type %s',
$connection->id,
$post_type['slug']
);
continue;
}
$results['debug'][] = sprintf(
'Found %d %s for connection %s, post type %s',
count( $remote_posts['items'] ),
_n( $post_type['slug'], $post_type['slug'] . 's', count( $remote_posts['items'] ) ),
$connection->id,
$post_type['slug']
);
// Get the sync log for this connection
$sync_log = $connection->get_sync_log();
$results['debug'][] = sprintf(
'Sync log for connection %s: %s',
$connection->id,
! empty( $sync_log ) ? json_encode( $sync_log ) : 'empty'
);
// Prepare posts for pulling
$items_to_pull = array();
foreach ( $remote_posts['items'] as $remote_post ) {
// Skip if post is already in sync log
if ( isset( $sync_log[$remote_post->ID] ) ) {
$results['debug'][] = sprintf(
'Skipping %s %d - already in sync log',
_n( $post_type['slug'], $post_type['slug'] . 's', 1 ),
$remote_post->ID
);
continue;
}
// Skip if post has been trashed
if ( 'trash' === get_post_status( $remote_post->ID ) ) {
$results['debug'][] = sprintf(
'Skipping %s %d - has been trashed',
_n( $post_type['slug'], $post_type['slug'] . 's', 1 ),
$remote_post->ID
);
continue;
}
// Follow exact format from WordPressExternalConnection::pull() documentation
$items_to_pull[] = array(
'remote_post_id' => $remote_post->ID,
'post_type' => $post_type['slug'],
);
}
if ( ! empty( $items_to_pull ) ) {
$results['debug'][] = sprintf(
'Attempting to pull %d %s for connection %s, post type %s',
count( $items_to_pull ),
_n( $post_type['slug'], $post_type['slug'] . 's', count( $items_to_pull ) ),
$connection->id,
$post_type['slug']
);
// Let Distributor handle all duplicate detection and post status
$pull_results = $connection->pull( $items_to_pull );
if ( is_wp_error( $pull_results ) ) {
$results['errors'][] = sprintf(
'Error pulling %s for connection %s, post type %s: %s',
_n( $post_type['slug'], $post_type['slug'] . 's', 1 ),
$connection->id,
$post_type['slug'],
$pull_results->get_error_message()
);
} else {
// Create mapping of remote post IDs to new post IDs for sync log
$post_id_mappings = array();
$successful_pulls = 0;
foreach ( $pull_results as $key => $pulled_post ) {
if ( ! is_wp_error( $pulled_post ) ) {
$post_id_mappings[$items_to_pull[$key]['remote_post_id']] = $pulled_post;
$successful_pulls++;
}
}
// Log the sync to update the admin UI
if ( ! empty( $post_id_mappings ) ) {
$connection->log_sync( $post_id_mappings );
$results['debug'][] = sprintf(
'Logged sync for %d %s',
count( $post_id_mappings ),
_n( $post_type['slug'], $post_type['slug'] . 's', count( $post_id_mappings ) )
);
}
$results['posts_pulled'] += $successful_pulls;
$results['debug'][] = sprintf(
'Successfully pulled %d %s for connection %s, post type %s',
$successful_pulls,
_n( $post_type['slug'], $post_type['slug'] . 's', $successful_pulls ),
$connection->id,
$post_type['slug']
);
}
}
}
}
return $results;
}
/**
* WP-CLI command class
*/
class CLI {
/**
* Pull posts from external sites
*
* ## OPTIONS
*
* [--verbose]
* : Show detailed output
*
* ## EXAMPLES
*
* wp distributor-auto-pull pull
* wp distributor-auto-pull pull --verbose
*
* @param array $args Command arguments
* @param array $assoc_args Command associative arguments
*/
public function pull( $args, $assoc_args ) {
$verbose = ! empty( $assoc_args['verbose'] );
\WP_CLI::log( 'Starting Distributor Auto Pull...' );
$results = pull_posts();
if ( $verbose ) {
\WP_CLI::log( sprintf(
'Checked %d connections, pulled %d posts',
$results['connections_checked'],
$results['posts_pulled']
) );
if ( ! empty( $results['debug'] ) ) {
\WP_CLI::log( 'Debug information:' );
foreach ( $results['debug'] as $debug ) {
\WP_CLI::log( '- ' . $debug );
}
}
if ( ! empty( $results['errors'] ) ) {
\WP_CLI::warning( 'Errors encountered:' );
foreach ( $results['errors'] as $error ) {
\WP_CLI::log( '- ' . $error );
}
}
} else {
\WP_CLI::success( sprintf(
'Pulled %d posts from %d connections',
$results['posts_pulled'],
$results['connections_checked']
) );
}
}
/**
* Clear the sync log for a connection
*
* ## OPTIONS
*
* <connection_id>
* : The ID of the connection to clear the sync log for
*
* [--all]
* : Clear sync logs for all connections
*
* ## EXAMPLES
*
* wp distributor-auto-pull clear-sync-log 506
* wp distributor-auto-pull clear-sync-log --all
*
* @subcommand clear-sync-log
* @param array $args Command arguments
* @param array $assoc_args Command associative arguments
*/
public function clear_sync_log( $args, $assoc_args ) {
$clear_all = ! empty( $assoc_args['all'] );
if ( $clear_all ) {
$connections = get_external_connections();
if ( empty( $connections ) ) {
\WP_CLI::error( 'No external connections found.' );
return;
}
foreach ( $connections as $connection ) {
delete_post_meta( $connection->id, 'dt_sync_log' );
\WP_CLI::log( sprintf( 'Cleared sync log for connection %s', $connection->id ) );
}
\WP_CLI::success( sprintf( 'Cleared sync logs for %d connections', count( $connections ) ) );
} else {
if ( empty( $args[0] ) ) {
\WP_CLI::error( 'Please provide a connection ID or use --all flag.' );
return;
}
$connection_id = $args[0];
$connection = \Distributor\ExternalConnection::instantiate( $connection_id );
if ( is_wp_error( $connection ) ) {
\WP_CLI::error( sprintf( 'Invalid connection ID: %s', $connection_id ) );
return;
}
delete_post_meta( $connection_id, 'dt_sync_log' );
\WP_CLI::success( sprintf( 'Cleared sync log for connection %s', $connection_id ) );
}
}
}
// Clean up on deactivation
register_deactivation_hook( __FILE__, function() {
wp_clear_scheduled_hook( 'distributor_auto_pull_cron' );
} );
<?php
/**
* Plugin Name: Distributor Post Modifier
* Description: Modifies pulled posts to show excerpts and link to original sources.
* Version: 1.0.0
* Author: Automattic
* License: GPL v2 or later
* License URI: https://www.gnu.org/licenses/gpl-2.0.html
*/
namespace Automattic\DistributorPostModifier;
// Exit if accessed directly
if ( ! defined( 'ABSPATH' ) ) {
exit;
}
// Hook into Distributor's pull action
add_action( 'dt_pull_post', __NAMESPACE__ . '\\modify_pulled_post', 10, 3 );
// Filter post permalink to point to original source
add_filter( 'post_link', __NAMESPACE__ . '\\modify_post_link', 10, 3 );
/**
* Modify pulled post after it's created
*
* @param int $new_post_id The new post ID
* @param object $connection The connection object
* @param array $post_array The original post data
*/
function modify_pulled_post( $new_post_id, $connection, $post_array ) {
// Set post format to "Link"
set_post_format( $new_post_id, 'link' );
}
/**
* Modify post permalink to point to original source
*
* @param string $permalink The post permalink
* @param WP_Post $post The post object
* @param bool $leavename Whether to leave the post name
* @return string Modified permalink
*/
function modify_post_link( $permalink, $post, $leavename ) {
// Check if this is a distributed post
$original_url = get_post_meta( $post->ID, 'dt_original_post_url', true );
if ( ! empty( $original_url ) ) {
return $original_url;
}
return $permalink;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment