Skip to content

Instantly share code, notes, and snippets.

@boris-glumpler
Created June 6, 2024 18:49
Show Gist options
  • Save boris-glumpler/d95e67f6aefe89804d2a842d7cef3922 to your computer and use it in GitHub Desktop.
Save boris-glumpler/d95e67f6aefe89804d2a842d7cef3922 to your computer and use it in GitHub Desktop.
ParadeDB pg_search bm25 index creation with Laravel/PostgreSQL
<?php
/**
* This class is responsible for creating an index
* @see https://docs.paradedb.com/search/quickstart
*/
declare(strict_types=1);
namespace Database;
use stdClass;
use InvalidArgumentException;
use Illuminate\Support\Facades\DB;
/**
* @method static addNumericFields(array $config)
* @method static addTextFields(array $config)
* @method static addJsonFields(array $config)
* @method static addBooleanFields(array $config)
* @method static addDateFields(array $config)
*/
class Bm25
{
protected array $fields = [
'text' => [],
'numeric' => [],
'boolean' => [],
'json' => [],
'date' => [],
];
protected function __construct(
protected string $table,
protected string $schema,
protected string $id,
) {
}
public static function index(string $table, string $schema = 'public', string $id = 'id'): static
{
return new static($table, $schema, $id);
}
protected function addFields(string $name, array $config): static
{
if (! array_key_exists($name, $this->fields)) {
throw new InvalidArgumentException("Field '$name' does not exist");
}
$this->fields[$name] = $config;
return $this;
}
protected function encodeConfig(array $config): string
{
return collect($config)->mapWithKeys(
fn (mixed $value, int|string $key) => is_int($key)
? [$value => new stdClass()]
: [$key => $value]
)->pipe(
fn ($encoded) => json_encode($encoded)
);
}
public function create(bool $drop = false): void
{
if ($drop) {
$this->drop();
}
$fields = collect($this->fields)->map(
fn (array $config) => blank($config) ? '{}' : $this->encodeConfig($config),
);
DB::statement(
<<<QUERY
CALL paradedb.create_bm25(
index_name => '{$this->table}_idx',
schema_name => '$this->schema',
table_name => '$this->table',
key_field => '$this->id',
text_fields => '{$fields->get('text')}',
numeric_fields => '{$fields->get('numeric')}',
boolean_fields => '{$fields->get('boolean')}',
json_fields => '{$fields->get('json')}',
datetime_fields => '{$fields->get('date')}'
);
QUERY
);
}
public function drop(): void
{
DB::statement(
<<<QUERY
CALL paradedb.drop_bm25(
index_name => '{$this->table}_idx',
schema_name => '$this->schema'
);
QUERY
);
}
public function __call(string $method, array $arguments): static
{
preg_match('/add(.*)Fields/', $method, $matches);
return $this->addFields(strtolower($matches[1]), $arguments[0]);
}
}
<?php
/**
* And this is how you could use it
*/
declare(strict_types=1);
use Database\Bm25;
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
return new class () extends Migration
{
public function up(): void
{
Schema::create('teams', static function (Blueprint $table) {
// table schema...
});
Bm25::index('teams')
->addNumericFields(['max_members'])
->addBooleanFields(['is_vip'])
->addDateFields(['created_at'])
->addTextFields([
'name',
'description' => [
'tokenizer' => [
'type' => 'default'
]
],
])
->create(drop: true);
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment