Skip to content

Instantly share code, notes, and snippets.

@masakielastic
Last active July 9, 2025 02:37
Show Gist options
  • Save masakielastic/bf3fe9590de636a110ea4285505a6475 to your computer and use it in GitHub Desktop.
Save masakielastic/bf3fe9590de636a110ea4285505a6475 to your computer and use it in GitHub Desktop.
str_iter のベンチマーク

str_iter のベンチマーク

https://github.com/masakielastic/php-ext-striter

str_iter の実装には PCRE2 JIT が使われています。

php -d extension=modules/striter.so  benchmark.php

[ASCII]
grapheme_*                    :   0.003695 sec, mem: 0 bytes
IntlBreakIterator             :   0.161382 sec, mem: 0 bytes
preg_match_all によるユーザー関数:   0.003848 sec, mem: 0 bytes
preg_match によるユーザー関数:   0.006448 sec, mem: 0 bytes
str_iter拡張                :   0.012894 sec, mem: 0 bytes

[日本語]
grapheme_*                    :   0.120388 sec, mem: 0 bytes
IntlBreakIterator             :   0.156671 sec, mem: 0 bytes
preg_match_all によるユーザー関数:   0.009735 sec, mem: 0 bytes
preg_match によるユーザー関数:   0.020445 sec, mem: 0 bytes
str_iter拡張                :   0.115297 sec, mem: 0 bytes

[絵文字]
grapheme_*                    :   0.064137 sec, mem: 0 bytes
IntlBreakIterator             :   0.153531 sec, mem: 0 bytes
preg_match_all によるユーザー関数:   0.003629 sec, mem: 0 bytes
preg_match によるユーザー関数:   0.004293 sec, mem: 0 bytes
str_iter拡張                :   0.008510 sec, mem: 0 bytes

[混在]
grapheme_*                    :   0.091253 sec, mem: 0 bytes
IntlBreakIterator             :   0.162230 sec, mem: 0 bytes
preg_match_all によるユーザー関数:   0.007549 sec, mem: 0 bytes
preg_match によるユーザー関数:   0.016544 sec, mem: 0 bytes
str_iter拡張                :   0.063441 sec, mem: 0 bytes
<?php
function foreach_grapheme($str) {
$len = grapheme_strlen($str);
$chars = [];
for ($i = 0; $i < $len; $i++) {
$char = grapheme_substr($str, $i, 1);
$chars[] = $char; // ダミー操作
}
}
function foreach_intl($str) {
$bi = IntlBreakIterator::createCharacterInstance('UTF-8');
$bi->setText($str);
$chars = [];
$prev = 0;
foreach ($bi as $pos) {
if ($pos > $prev) {
$char = substr($str, $prev, $pos - $prev);
$chars[] = $char; // ダミー操作(例: 配列にpush)
$prev = $pos;
}
}
}
function foreach_pcre($str) {
preg_match_all('/\X/u', $str, $m);
$chars = [];
foreach ($m[0] as $char) {
$chars[] = $char; // ダミー操作
}
}
function foreach_pregmatch($str) {
$offset = 0;
$chars = [];
$len = strlen($str);
while ($offset < $len) {
if (preg_match('/\X/u', $str, $m, 0, $offset)) {
$chars[] = $m[0]; // ダミー操作
$offset += strlen($m[0]);
} else {
break;
}
}
}
function foreach_striter($str) {
$chars = [];
foreach (str_iter($str, 'grapheme') as $char) {
$chars[] = $char; // ダミー操作
}
}
function bench($fn, $label, $repeat = 10000) {
$start = microtime(true);
$mem_start = memory_get_usage();
for ($i = 0; $i < $repeat; $i++) {
$fn();
}
$mem_end = memory_get_usage();
$elapsed = microtime(true) - $start;
printf("%-30s: %10.6f sec, mem: %d bytes\n", $label, $elapsed, $mem_end - $mem_start);
}
// サンプル入力
$inputs = [
"ASCII" => "abcde",
"日本語" => "あいうえお漢字カタカナひらがな",
"絵文字" => "😀🍣🍺🏳️‍🌈👨‍👩‍👧‍👦🇯🇵",
"混在" => "abcあいう🍺123👩‍🔬",
];
foreach ($inputs as $label => $text) {
echo "[$label]\n";
bench(fn() => foreach_grapheme($text), "grapheme_*");
bench(fn() => foreach_intl($text), "IntlBreakIterator");
bench(fn() => foreach_pcre($text), "preg_match_all によるユーザー関数");
bench(fn() => foreach_pregmatch($text), "preg_match によるユーザー関数");
bench(fn() => foreach_striter($text), "str_iter拡張");
echo "\n";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment