PHP の文字列処理に潜ってみる修行
PHP のシングルクオート、ダブルクオート、sprintf の処理時間について、だいたい 1:4:2 という実測値を見たので、実際に何をしているのかちょっと見てみることに。ソースコードから追うのは止め、ltrace で呼ばれる関数を調べてみた。
<?php $name = 'Tarou Yamada'; $food = 'Hamburger'; $i = 0; touch('tmp'); $test = $i.') My name is '.$name.'. I love '.$food.' very much.'; touch('tmp2'); $test = "$i) My name is $name. I love $food very much."; touch('tmp3'); $test = sprintf('%s) My name is %s. I love %s very much.';, $i, $name, $food); touch('tmp4'); ?>
という test.php に対して、ltrace php test.php を実行。touch は各実行部分の特定のために挿入。
字句解析や構文木作成のオーバヘッドは無視して、とりあえず実行部分らしいところだけをピックアップ。
シングルクォート
malloc(36) = 0x9478238sprintf("0", "%ld", 0) = 1 memcpy(0x946953c, "0", 1) = 0x946953c memcpy(0x946953d, ") My name is ", 13) = 0x946953d memcpy(0x947874c, "0) My name is ", 14) = 0x947874c memcpy(0x947875a, "Tarou Yamada", 12) = 0x947875a malloc(52) = 0x9478260 memcpy(0x947826c, "0) My name is Tarou Yamada", 26) = 0x947826c memcpy(0x9478286, ". I love ", 9) = 0x9478286 memcpy(0x9473394, "0) My name is Tarou Yamada. I lo"..., 35) = 0x9473394 memcpy(0x94733b7, "Hamburger", 9) = 0x94733b7 malloc(68) = 0x9478298 memcpy(0x94782a4, "0) My name is Tarou Yamada. I lo"..., 44) = 0x94782a4 memcpy(0x94782d0, " very much.", 11) = 0x94782d0 memcpy(0x947828c, "test", 5) = 0x947828c
ダブルクォート
sprintf("0", "%ld", 0) = 1 realloc(0x9478ad8, 20) = 0x9478ad8 memcpy(0x9478ae4, "0", 1) = 0x9478ae4 realloc(0x9478ad8, 20) = 0x9478ad8 memcpy(0x9478ae5, ") ", 2) = 0x9478ae5 realloc(0x9478ad8, 20) = 0x9478ad8 memcpy(0x9478ae7, "My", 2) = 0x9478ae7 realloc(0x9478ad8, 20) = 0x9478ad8 memcpy(0x9478ae9, " ", 1) = 0x9478ae9 realloc(0x9478ad8, 28) = 0x94782e0 memcpy(0x94782f2, "name", 4) = 0x94782f2 realloc(0x94782e0, 28) = 0x94782e0 memcpy(0x94782f6, " ", 1) = 0x94782f6 realloc(0x94782e0, 28) = 0x94782e0 memcpy(0x94782f7, "is", 2) = 0x94782f7 realloc(0x94782e0, 28) = 0x94782e0 memcpy(0x94782f9, " ", 1) = 0x94782f9 realloc(0x94782e0, 44) = 0x94782e0 memcpy(0x94782fa, "Tarou Yamada", 12) = 0x94782fa realloc(0x94782e0, 44) = 0x94782e0 memcpy(0x9478306, ". ", 2) = 0x9478306 realloc(0x94782e0, 44) = 0x94782e0 memcpy(0x9478308, "I", 1) = 0x9478308 realloc(0x94782e0, 44) = 0x94782e0 memcpy(0x9478309, " ", 1) = 0x9478309 realloc(0x94782e0, 52) = 0x94782e0 memcpy(0x947830a, "love", 4) = 0x947830a realloc(0x94782e0, 52) = 0x94782e0 memcpy(0x947830e, " ", 1) = 0x947830e realloc(0x94782e0, 60) = 0x94782e0 memcpy(0x947830f, "Hamburger", 9) = 0x947830f realloc(0x94782e0, 60) = 0x94782e0 memcpy(0x9478318, " ", 1) = 0x9478318 realloc(0x94782e0, 68) = 0x94782e0 memcpy(0x9478319, "very", 4) = 0x9478319 realloc(0x94782e0, 68) = 0x94782e0 memcpy(0x947831d, " ", 1) = 0x947831d realloc(0x94782e0, 68) = 0x94782e0 memcpy(0x947831e, "much", 4) = 0x947831e realloc(0x94782e0, 68) = 0x94782e0 memcpy(0x9478322, ".", 1) = 0x9478322
sprintf
malloc(52) = 0x9478328 memcpy(0x9478334, "%s) My name is %s. I love %s ver"..., 39) = 0x9478334 malloc(28) = 0x9478360 malloc(252) = 0x9478380 __ctype_b_loc() = 0x19824c malloc(28) = 0x9478480 sprintf("0", "%ld", 0) = 1 memcpy(0x947838c, "0", 2) = 0x947838c __ctype_b_loc() = 0x19824c malloc(28) = 0x94784a0 malloc(28) = 0x94784c0 memcpy(0x94784cc, "Tarou Yamada", 12) = 0x94784cc memcpy(0x947839a, "Tarou Yamada", 13) = 0x947839a __ctype_b_loc() = 0x19824c malloc(28) = 0x94784e0 malloc(28) = 0x9478500 memcpy(0x947850c, "Hamburger", 9) = 0x947850c memcpy(0x94783af, "Hamburger", 10) = 0x94783af
これだけ見ると、ダブルクォートが遅いのは realloc のせいだろうか。realloc は遅そうな気がするし、それを小刻みにしかも同じサイズで 2 度ずつとっている。
単純に行数だけ比べても 15, 41, 18。実測値 1:4:2 に近いと言えるだろうか。