Skip to content

Instantly share code, notes, and snippets.

@gam6itko
Last active July 28, 2023 15:21
Show Gist options
  • Save gam6itko/b3b31332066beae851f9d2947832d24f to your computer and use it in GitHub Desktop.
Save gam6itko/b3b31332066beae851f9d2947832d24f to your computer and use it in GitHub Desktop.
php rdkafka batch messages processing

php-rdkafka batch messages processing

This example shows how to process kafka messages in batches with php-rdkafka. As soon as the required number of messages appears in topic, they will be immediately processed.

The key Kafka Conf value to achieve proper behavior is enable.auto.offset.store=false. See here

Execute

PARTITION=0
BATCH_SIZE=1000
php rdkafka-batch-processing.php $PARTITION $BATCH_SIZE

php-rdkafka docs

<?php
declare(strict_types=1);
// arguments
$partition = (int) ($argv[1] ?? 0);
$batchSize = (int) ($argv[2] ?? 1000);
printf("Consume from partition: %d\n", $partition);
// configure
$conf = new RdKafka\Conf();
$conf->set('group.id', 'my_customer_group_id');
$conf->set('metadata.broker.list', 'kafka'); //docker container name
$conf->set('auto.offset.reset', 'earliest');
$conf->set('enable.partition.eof', 'true');
$conf->set('enable.auto.offset.store', 'false');
$consumer = new RdKafka\Consumer($conf);
$topic = $consumer->newTopic('my_topic_name');
// retrieve stored offset
$topic->consumeStart($partition, RD_KAFKA_OFFSET_STORED);
$currentMessage = $topic->consume($partition, 10_000);
if (RD_KAFKA_RESP_ERR_NO_ERROR !== $currentMessage->err) {
printf("Message consume error: %d\n", $currentMessage->err);
exit;
}
$currentMessageOffset = $currentMessage->offset;
unset($currentMessage);
printf("Current message offset: %d\n", $currentMessageOffset);
$topic->consumeStop($partition);
// retrieve last offset
$topic->consumeStart($partition, rd_kafka_offset_tail(1));
$lastMessage = $topic->consume($partition, 10_000);
$lastMessageOffset = $lastMessage->offset;
unset($lastMessage);
printf("Last message offset: %d\n", $lastMessageOffset);
$topic->consumeStop($partition);
$messagesBehindCnt = $lastMessageOffset - $currentMessageOffset;
printf("Messages waiting for consumption: \033[32m%d\033[0m\n", $messagesBehindCnt);
if ($messagesBehindCnt <= $batchSize) {
printf("\033[31mNot enough messages for batch: %d\033[0m\n", $messagesBehindCnt);
exit;
}
// consume batch
$topic->consumeStart($partition, RD_KAFKA_OFFSET_STORED);
$messagesList = $topic->consumeBatch($partition, 10_000, $batchSize);
if ($messagesList) {
$lastMessage = end($messagesList);
printf("Commit offset: \033[32m%d\033[0m\n", $lastMessage->offset);
$topic->offsetStore($partition, $lastMessage->offset);
}
$topic->consumeStop($partition);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment