Created
June 11, 2025 09:15
-
-
Save DevWael/ca178a99d4c1370f43a891c039d77f09 to your computer and use it in GitHub Desktop.
File-based vector storage using sharding with BINARY storage for vectors.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Class SimpleShardedVectorStoreBinary | |
* | |
* File-based vector storage using sharding with BINARY storage for vectors. | |
* Includes: | |
* - Binary format for shards (potentially faster I/O & parsing, smaller files). | |
* - Optional vector normalization for faster search. | |
* - Pre-calculated magnitudes stored. | |
* - Transactional writes for shards (temp file + rename). | |
* - Metadata filtering during search. | |
* - Batch adding of vectors. | |
* | |
* LIMITATIONS: | |
* - Search is still a LINEAR SCAN across all vectors. No true ANN indexing. | |
* - Concurrency, error recovery, advanced DB features are not comparable to dedicated vector DBs. | |
*/ | |
class SimpleShardedVectorStoreBinary { | |
private string $storageDir; | |
private int $numShards; | |
private bool $normalizeVectors; | |
private const SHARD_PREFIX = 'vector_shard_bin_'; // Indicate binary | |
private const SHARD_SUFFIX = '.dat'; // Data file | |
private const TEMP_SUFFIX = '.tmp'; | |
private const FLOAT_EPSILON = 1e-9; | |
// Binary format constants (adjust if needed) | |
// 'f' = single-precision float (4 bytes). 'd' = double-precision float (8 bytes). | |
// Most embeddings are float32, so 'f' is usually appropriate. | |
private const VECTOR_PACK_FORMAT = 'f'; // For individual vector elements | |
private const MAGNITUDE_PACK_FORMAT = 'f'; // For storing the magnitude | |
// Metadata types for binary storage | |
private const METADATA_TYPE_NULL = 0; | |
private const METADATA_TYPE_JSON_STRING = 1; // Store metadata as a JSON string | |
public function __construct( string $storageDir, int $numShards = 10, bool $normalizeVectors = true ) { | |
if ( $numShards <= 0 ) { | |
throw new \InvalidArgumentException( "Number of shards must be positive." ); | |
} | |
$this->storageDir = rtrim( $storageDir, '/\\' ); | |
$this->numShards = $numShards; | |
$this->normalizeVectors = $normalizeVectors; | |
if ( ! is_dir( $this->storageDir ) ) { | |
if ( ! mkdir( $this->storageDir, 0777, true ) ) { | |
throw new \RuntimeException( "Failed to create storage directory: {$this->storageDir}" ); | |
} | |
} | |
if ( ! is_writable( $this->storageDir ) ) { | |
throw new \RuntimeException( "Storage directory is not writable: {$this->storageDir}" ); | |
} | |
} | |
private function getShardIndex( string $id ): int { | |
return abs( crc32( $id ) ) % $this->numShards; | |
} | |
private function getShardPath( int $shardIndex ): string { | |
return $this->storageDir . '/' . self::SHARD_PREFIX . $shardIndex . self::SHARD_SUFFIX; | |
} | |
/** | |
* Loads data from a binary shard file. | |
* | |
* @param int $shardIndex | |
* | |
* @return array The data from the shard. | |
* @throws \RuntimeException If the file exists but cannot be read or parsed. | |
*/ | |
private function loadShard( int $shardIndex ): array { | |
$filePath = $this->getShardPath( $shardIndex ); | |
if ( ! file_exists( $filePath ) || filesize( $filePath ) === 0 ) { | |
return array(); | |
} | |
$binaryContent = file_get_contents( $filePath ); | |
if ( $binaryContent === false ) { | |
throw new \RuntimeException( "Failed to read shard file: {$filePath}" ); | |
} | |
$items = array(); | |
$offset = 0; | |
$contentLength = strlen( $binaryContent ); | |
$floatSize = ( $this::VECTOR_PACK_FORMAT === 'f' ) ? 4 : 8; | |
while ( $offset < $contentLength ) { | |
// 1. ID Length (uint16) | |
if ( $offset + 2 > $contentLength ) { | |
break; | |
} // Not enough data for ID length | |
$idLenData = unpack( 'n', substr( $binaryContent, $offset, 2 ) ); | |
$idLength = $idLenData[1]; | |
$offset += 2; | |
// 2. ID (string) | |
if ( $offset + $idLength > $contentLength ) { | |
break; | |
} // Not enough data for ID | |
$id = substr( $binaryContent, $offset, $idLength ); | |
$offset += $idLength; | |
// 3. Vector Dimension (uint16) | |
if ( $offset + 2 > $contentLength ) { | |
break; | |
} // Not enough data for dimension | |
$dimData = unpack( 'n', substr( $binaryContent, $offset, 2 ) ); | |
$dimension = $dimData[1]; | |
$offset += 2; | |
// 4. Vector Data (array of floats) | |
$vectorByteLength = $dimension * $floatSize; | |
if ( $offset + $vectorByteLength > $contentLength ) { | |
break; | |
} // Not enough data for vector | |
$vectorBinary = substr( $binaryContent, $offset, $vectorByteLength ); | |
$vector = array_values( unpack( $this::VECTOR_PACK_FORMAT . $dimension, $vectorBinary ) ); // array_values to re-index | |
$offset += $vectorByteLength; | |
// 5. Magnitude (float/double) | |
if ( $offset + $floatSize > $contentLength ) { | |
break; | |
} // Not enough data for magnitude | |
$magData = unpack( $this::MAGNITUDE_PACK_FORMAT . '1val', substr( $binaryContent, $offset, $floatSize ) ); | |
$magnitude = $magData['val']; | |
$offset += $floatSize; | |
// 6. Metadata Type (byte) | |
if ( $offset + 1 > $contentLength ) { | |
break; | |
} // Not enough data for metadata type | |
$metaTypeData = unpack( 'C', substr( $binaryContent, $offset, 1 ) ); | |
$metadataType = $metaTypeData[1]; | |
$offset += 1; | |
$metadata = null; | |
if ( $metadataType === self::METADATA_TYPE_JSON_STRING ) { | |
// 7. Metadata Length (uint32) | |
if ( $offset + 4 > $contentLength ) { | |
break; | |
} // Not enough for metadata length | |
$metaLenData = unpack( 'N', substr( $binaryContent, $offset, 4 ) ); | |
$metadataLength = $metaLenData[1]; | |
$offset += 4; | |
// 8. Metadata (JSON string) | |
if ( $offset + $metadataLength > $contentLength ) { | |
break; | |
} // Not enough for metadata | |
$metadataJson = substr( $binaryContent, $offset, $metadataLength ); | |
$offset += $metadataLength; | |
$metadata = json_decode( $metadataJson, true ); | |
if ( json_last_error() !== JSON_ERROR_NONE ) { | |
trigger_error( "Failed to decode metadata JSON for ID '{$id}' in shard {$shardIndex}: " . json_last_error_msg(), E_USER_WARNING ); | |
$metadata = array( '_parsing_error' => json_last_error_msg() ); // Store error instead of failing all | |
} | |
} | |
$items[] = array( | |
'id' => $id, | |
'vector' => $vector, | |
'dimension' => $dimension, // Store dimension for consistency checks | |
'magnitude' => $magnitude, | |
'metadata' => $metadata, | |
); | |
} | |
if ( $offset !== $contentLength && $contentLength > 0 ) { | |
trigger_error( "Shard file {$filePath} may be corrupt or incompletely read. Read {$offset} of {$contentLength} bytes.", E_USER_WARNING ); | |
} | |
return $items; | |
} | |
/** | |
* Saves data to a binary shard file using transactional writes. | |
* | |
* @param int $shardIndex | |
* @param array $shardData | |
* | |
* @throws \RuntimeException If saving fails. | |
*/ | |
private function saveShard( int $shardIndex, array $shardData ): void { | |
$shardPath = $this->getShardPath( $shardIndex ); | |
$tempPath = $this->storageDir . '/' . self::SHARD_PREFIX . $shardIndex . uniqid( '_temp_', true ) . self::TEMP_SUFFIX; | |
$binaryOutput = ''; | |
foreach ( $shardData as $item ) { | |
// 1. ID | |
$id = (string) ( $item['id'] ?? '' ); | |
$binaryOutput .= pack( 'n', strlen( $id ) ); // ID Length (uint16) | |
$binaryOutput .= $id; // ID (string) | |
// 2. Vector | |
$vector = $item['vector'] ?? array(); | |
$dimension = count( $vector ); | |
$binaryOutput .= pack( 'n', $dimension ); // Vector Dimension (uint16) | |
if ( $dimension > 0 ) { | |
// The '*' in pack format string for arrays applies to remaining arguments | |
// So we need to pass vector elements as separate arguments using '...' (splat operator) | |
$binaryOutput .= pack( $this::VECTOR_PACK_FORMAT . '*', ...$vector ); // Vector data | |
} | |
// 3. Magnitude | |
$magnitude = (float) ( $item['magnitude'] ?? 0.0 ); | |
$binaryOutput .= pack( $this::MAGNITUDE_PACK_FORMAT, $magnitude ); // Magnitude (float/double) | |
// 4. Metadata | |
$metadata = $item['metadata'] ?? null; | |
if ( $metadata === null ) { | |
$binaryOutput .= pack( 'C', self::METADATA_TYPE_NULL ); // Metadata Type (byte) | |
} else { | |
$binaryOutput .= pack( 'C', self::METADATA_TYPE_JSON_STRING ); // Metadata Type (byte) | |
$metadataJson = json_encode( $metadata ); | |
if ( $metadataJson === false ) { // Should not happen if metadata is valid PHP | |
trigger_error( "Failed to JSON encode metadata for ID '{$id}': " . json_last_error_msg(), E_USER_WARNING ); | |
$metadataJson = '{"_encoding_error":"' . json_last_error_msg() . '"}'; | |
} | |
$binaryOutput .= pack( 'N', strlen( $metadataJson ) ); // Metadata Length (uint32) | |
$binaryOutput .= $metadataJson; // Metadata (JSON string) | |
} | |
} | |
if ( file_put_contents( $tempPath, $binaryOutput ) === false ) { | |
if ( file_exists( $tempPath ) ) { | |
@unlink( $tempPath ); | |
} | |
throw new \RuntimeException( "Failed to write to temporary shard file: {$tempPath}" ); | |
} | |
if ( ! rename( $tempPath, $shardPath ) ) { | |
if ( file_exists( $tempPath ) ) { | |
@unlink( $tempPath ); | |
} | |
throw new \RuntimeException( "Failed to rename temporary shard file '{$tempPath}' to '{$shardPath}'." ); | |
} | |
} | |
public function addVector( string $id, array $vector, $metadata = null ): bool { | |
// Basic validation | |
if ( empty( $vector ) || ! is_array( $vector ) ) { /* ... error checks ... */ | |
return false; | |
} | |
foreach ( $vector as $val ) { | |
if ( ! is_numeric( $val ) ) { /* ... error checks ... */ | |
return false; | |
} | |
} | |
$shardIndex = $this->getShardIndex( $id ); | |
$shardData = $this->loadShard( $shardIndex ); | |
foreach ( $shardData as $item ) { | |
if ( $item['id'] === $id ) { | |
return false; | |
} // ID already exists | |
} | |
$magnitude = $this->magnitude( $vector ); | |
$vectorToStore = $vector; | |
$magnitudeToStore = $magnitude; | |
$dimension = count( $vector ); | |
if ( $this->normalizeVectors ) { | |
if ( $magnitude > self::FLOAT_EPSILON ) { | |
$vectorToStore = $this->normalizeVector( $vector, $magnitude ); | |
$magnitudeToStore = 1.0; | |
} else { | |
$magnitudeToStore = 0.0; | |
} | |
} | |
$shardData[] = array( | |
'id' => $id, | |
'vector' => $vectorToStore, | |
'dimension' => $dimension, | |
'magnitude' => $magnitudeToStore, | |
'metadata' => $metadata, | |
); | |
$this->saveShard( $shardIndex, $shardData ); | |
return true; | |
} | |
/** | |
* Adds multiple vectors in a batch. More efficient than single adds for large amounts. | |
* | |
* @param array $vectorsData Array of ['id' => ..., 'vector' => ..., 'metadata' => ...] | |
* | |
* @return array ['succeeded' => count, 'failed_ids' => [...], 'duplicate_ids' => [...]] | |
*/ | |
public function addVectorsBatch( array $vectorsData ): array { | |
$results = array( 'succeeded' => 0, 'failed_ids' => array(), 'duplicate_ids' => array() ); | |
$vectorsByShard = array(); | |
// Group vectors by shard | |
foreach ( $vectorsData as $vData ) { | |
if ( ! isset( $vData['id'], $vData['vector'] ) || empty( $vData['vector'] ) || ! is_array( $vData['vector'] ) ) { | |
$results['failed_ids'][] = $vData['id'] ?? 'unknown_id_missing_vector'; | |
continue; | |
} | |
// Basic numeric check for vector elements | |
$validVector = true; | |
foreach ( $vData['vector'] as $val ) { | |
if ( ! is_numeric( $val ) ) { | |
$validVector = false; | |
break; | |
} | |
} | |
if ( ! $validVector ) { | |
$results['failed_ids'][] = $vData['id']; | |
continue; | |
} | |
$shardIndex = $this->getShardIndex( $vData['id'] ); | |
$vectorsByShard[ $shardIndex ][] = $vData; | |
} | |
// Process each shard | |
foreach ( $vectorsByShard as $shardIndex => $shardVectors ) { | |
$currentShardData = $this->loadShard( $shardIndex ); | |
$existingIdsInShard = array_column( $currentShardData, 'id' ); | |
$vectorsToAddThisShard = array(); | |
foreach ( $shardVectors as $vData ) { | |
if ( in_array( $vData['id'], $existingIdsInShard ) ) { | |
$results['duplicate_ids'][] = $vData['id']; | |
continue; | |
} | |
$vector = $vData['vector']; | |
$metadata = $vData['metadata'] ?? null; | |
$id = $vData['id']; | |
$magnitude = $this->magnitude( $vector ); | |
$vectorToStore = $vector; | |
$magnitudeToStore = $magnitude; | |
$dimension = count( $vector ); | |
if ( $this->normalizeVectors ) { | |
if ( $magnitude > self::FLOAT_EPSILON ) { | |
$vectorToStore = $this->normalizeVector( $vector, $magnitude ); | |
$magnitudeToStore = 1.0; | |
} else { | |
$magnitudeToStore = 0.0; | |
} | |
} | |
$vectorsToAddThisShard[] = array( | |
'id' => $id, | |
'vector' => $vectorToStore, | |
'dimension' => $dimension, | |
'magnitude' => $magnitudeToStore, | |
'metadata' => $metadata, | |
); | |
$existingIdsInShard[] = $id; // Prevent adding same ID twice in one batch | |
} | |
if ( ! empty( $vectorsToAddThisShard ) ) { | |
$newShardData = array_merge( $currentShardData, $vectorsToAddThisShard ); | |
try { | |
$this->saveShard( $shardIndex, $newShardData ); | |
$results['succeeded'] += count( $vectorsToAddThisShard ); | |
} catch ( \Exception $e ) { | |
trigger_error( "Batch add failed for shard {$shardIndex}: " . $e->getMessage(), E_USER_WARNING ); | |
foreach ( $vectorsToAddThisShard as $failedVec ) { | |
$results['failed_ids'][] = $failedVec['id']; | |
} | |
} | |
} | |
} | |
return $results; | |
} | |
public function removeVector( string $id ): bool { /* ... Same logic as before, relies on load/saveShard ... */ | |
$shardIndex = $this->getShardIndex( $id ); | |
$shardData = $this->loadShard( $shardIndex ); | |
$initialCount = count( $shardData ); | |
$shardData = array_filter( $shardData, fn( $item ) => $item['id'] !== $id ); | |
if ( count( $shardData ) < $initialCount ) { | |
$this->saveShard( $shardIndex, $shardData ); | |
return true; | |
} | |
return false; | |
} | |
public function getVectorById( string $id ): ?array { /* ... Same logic as before ... */ | |
$shardIndex = $this->getShardIndex( $id ); | |
$shardData = $this->loadShard( $shardIndex ); | |
foreach ( $shardData as $item ) { | |
if ( $item['id'] === $id ) { | |
return $item; | |
} | |
} | |
return null; | |
} | |
public function findSimilar( array $queryVector, int $topK = 5, $metadataFilter = null ): array { | |
// ... (Validate queryVector) ... | |
if ( empty( $queryVector ) || ! is_array( $queryVector ) ) { | |
return array(); | |
} | |
foreach ( $queryVector as $val ) { | |
if ( ! is_numeric( $val ) ) { | |
return array(); | |
} | |
} | |
$allResults = array(); | |
$queryMagnitude = $this->magnitude( $queryVector ); | |
if ( $queryMagnitude < self::FLOAT_EPSILON ) { | |
return array(); | |
} | |
$normalizedQueryVector = $this->normalizeVector( $queryVector, $queryMagnitude ); | |
$queryDimension = count( $queryVector ); | |
$filterFn = null; /* ... (Metadata filter setup from previous version) ... */ | |
if ( is_callable( $metadataFilter ) ) { | |
$filterFn = $metadataFilter; | |
} elseif ( is_array( $metadataFilter ) && ! empty( $metadataFilter ) ) { | |
$filterFn = function ( $metadata ) use ( $metadataFilter ): bool { | |
if ( ! is_array( $metadata ) ) { | |
return false; | |
} | |
foreach ( $metadataFilter as $key => $value ) { | |
if ( ! isset( $metadata[ $key ] ) || $metadata[ $key ] !== $value ) { | |
return false; | |
} | |
} | |
return true; | |
}; | |
} | |
for ( $i = 0; $i < $this->numShards; $i ++ ) { | |
$shardData = $this->loadShard( $i ); // Now loads binary data | |
if ( empty( $shardData ) ) { | |
continue; | |
} | |
foreach ( $shardData as $item ) { | |
// Basic validation from loaded binary data | |
if ( ! isset( $item['id'], $item['vector'], $item['magnitude'], $item['dimension'] ) || ! is_array( $item['vector'] ) ) { | |
trigger_error( "Skipping invalid item in shard {$i} after binary load.", E_USER_WARNING ); | |
continue; | |
} | |
if ( $filterFn !== null ) { /* ... (Metadata filtering) ... */ | |
if ( ! $filterFn( $item['metadata'] ?? null ) ) { | |
continue; | |
} | |
} | |
if ( $item['dimension'] !== $queryDimension ) { /* ... (Dimension check) ... */ | |
trigger_error( "Dimension mismatch ID '{$item['id']}' (stored {$item['dimension']}, query {$queryDimension}). Skipping.", E_USER_WARNING ); | |
continue; | |
} | |
$storedMagnitude = (float) $item['magnitude']; | |
$storedVector = $item['vector']; | |
if ( $storedMagnitude < self::FLOAT_EPSILON ) { | |
continue; | |
} | |
$similarity = 0.0; | |
if ( abs( $storedMagnitude - 1.0 ) < self::FLOAT_EPSILON && $this->normalizeVectors ) { | |
$similarity = $this->dotProduct( $normalizedQueryVector, $storedVector ); | |
} else { | |
$dot = $this->dotProduct( $normalizedQueryVector, $storedVector ); | |
$similarity = $dot / $storedMagnitude; | |
} | |
$similarity = max( - 1.0, min( 1.0, $similarity ) ); | |
$allResults[] = array( | |
'id' => $item['id'], | |
'metadata' => $item['metadata'] ?? null, | |
'score' => $similarity, | |
); | |
} | |
unset( $shardData ); | |
} | |
usort( $allResults, fn( $a, $b ) => $b['score'] <=> $a['score'] ); | |
return array_slice( $allResults, 0, $topK ); | |
} | |
// --- Helper Functions (normalizeVector, dotProduct, magnitude) - same as before --- | |
private function normalizeVector( array $vec, ?float $magnitude = null ): array { /* ... */ | |
if ( $magnitude === null ) { | |
$magnitude = $this->magnitude( $vec ); | |
} | |
if ( $magnitude < self::FLOAT_EPSILON ) { | |
return array_fill( 0, count( $vec ), 0.0 ); | |
} | |
$normalized = array(); | |
foreach ( $vec as $value ) { | |
$normalized[] = (float) $value / $magnitude; | |
} | |
return $normalized; | |
} | |
private function dotProduct( array $vec1, array $vec2 ): float { /* ... */ | |
$result = 0.0; | |
$count = count( $vec1 ); | |
for ( $i = 0; $i < $count; $i ++ ) { | |
if ( isset( $vec1[ $i ], $vec2[ $i ] ) && is_numeric( $vec1[ $i ] ) && is_numeric( $vec2[ $i ] ) ) { | |
$result += (float) $vec1[ $i ] * (float) $vec2[ $i ]; | |
} else { | |
return NAN; | |
} | |
} | |
return $result; | |
} | |
private function magnitude( array $vec ): float { /* ... */ | |
$sumOfSquares = 0.0; | |
foreach ( $vec as $value ) { | |
if ( is_numeric( $value ) ) { | |
$sumOfSquares += (float) $value * (float) $value; | |
} else { | |
return NAN; | |
} | |
} | |
return sqrt( max( 0.0, $sumOfSquares ) ); | |
} | |
// --- Management Functions (getAllVectors, clearAll) --- | |
public function getAllVectors(): array { /* ... Same as before, relies on loadShard ... */ | |
$allData = array(); | |
for ( $i = 0; $i < $this->numShards; $i ++ ) { | |
$shardData = $this->loadShard( $i ); | |
if ( ! empty( $shardData ) ) { | |
foreach ( $shardData as $item ) { | |
$allData[] = $item; | |
} | |
} | |
unset( $shardData ); | |
} | |
return $allData; | |
} | |
public function clearAll(): bool { /* ... Same as before, but be careful with temp files if any exist ... */ | |
$success = true; | |
for ( $i = 0; $i < $this->numShards; $i ++ ) { | |
$filePath = $this->getShardPath( $i ); | |
if ( file_exists( $filePath ) ) { | |
if ( ! unlink( $filePath ) ) { | |
trigger_error( "Failed to delete shard file: {$filePath}", E_USER_WARNING ); | |
$success = false; | |
} | |
} | |
$tempPattern = $this->storageDir . '/' . self::SHARD_PREFIX . $i . '_temp_*' . self::TEMP_SUFFIX; | |
foreach ( glob( $tempPattern ) as $tempFile ) { | |
@unlink( $tempFile ); | |
} | |
} | |
if ( ! is_dir( $this->storageDir ) ) { | |
@mkdir( $this->storageDir, 0777, true ); | |
} | |
return $success; | |
} | |
} | |
// --- Example Usage --- | |
$storageDirectory = './vector_store_binary_shards'; | |
$numberOfShards = 3; | |
$normalizeOnAdd = true; | |
$vectorStore = new SimpleShardedVectorStoreBinary( $storageDirectory, $numberOfShards, $normalizeOnAdd ); | |
echo "Using Binary Store. Normalization on Add: " . ( $normalizeOnAdd ? 'Enabled' : 'Disabled' ) . "\n"; | |
// Optional: Clear previous data | |
$vectorStore->clearAll(); | |
echo "Store cleared.\n"; | |
$vectorsToAdd = array( | |
array( 'id' => 'cat1_bin', 'vector' => array( 0.1, 0.8, 0.1, 0.5 ), 'metadata' => array( 'topic' => 'animal', 'source' => 'A' ) ), | |
array( 'id' => 'dog1_bin', 'vector' => array( 0.9, 0.1, 0.1, 0.2 ), 'metadata' => array( 'topic' => 'animal', 'source' => 'B' ) ), | |
array( 'id' => 'sky1_bin', 'vector' => array( 0.1, 0.1, 0.9, 0.8 ), 'metadata' => array( 'topic' => 'weather', 'source' => 'C' ) ), | |
); | |
$vectorsToAddMore = array( // For batch add | |
array( 'id' => 'cat2_bin', 'vector' => array( 0.2, 0.7, 0.0, 0.6 ), 'metadata' => array( 'topic' => 'animal', 'source' => 'A' ) ), | |
array( 'id' => 'dog2_bin', 'vector' => array( 0.8, 0.2, 0.0, 0.3 ), 'metadata' => array( 'topic' => 'animal', 'source' => 'D' ) ), | |
// new source | |
array( 'id' => 'sky2_bin', 'vector' => array( 0.0, 0.2, 0.8, 0.7 ), 'metadata' => array( 'topic' => 'weather', 'source' => 'A' ) ), | |
array( 'id' => 'dup_cat1_bin', 'vector' => array( 0.1, 0.1, 0.1, 0.1 ), 'metadata' => array( 'topic' => 'duplicate_test' ) ), | |
// this ID will be a duplicate of cat1_bin if cat1_bin is added first | |
); | |
echo "\nAdding vectors individually...\n"; | |
foreach ( $vectorsToAdd as $v ) { | |
if ( $vectorStore->addVector( $v['id'], $v['vector'], $v['metadata'] ) ) { | |
echo " - Added ID: {$v['id']}\n"; | |
} else { | |
echo " - Failed or duplicate ID: {$v['id']}\n"; | |
} | |
} | |
echo "\nAdding vectors in batch...\n"; | |
// Adjust vectorsToAddMore to avoid duplicates if running sequentially | |
// For this test, let's assume the first batch has unique IDs relative to the second. | |
$batchResult = $vectorStore->addVectorsBatch( $vectorsToAddMore ); | |
echo "Batch Add Results:\n"; | |
print_r( $batchResult ); | |
// Test search | |
$queryVector = array( 0.15, 0.75, 0.05, 0.55 ); // 4D query | |
echo "\nSearching for vectors similar to: [" . implode( ', ', $queryVector ) . "]\n"; | |
$similar = $vectorStore->findSimilar( $queryVector, 3 ); | |
print_r( $similar ); | |
// Test get by ID | |
$item = $vectorStore->getVectorById( 'dog1_bin' ); | |
echo "\nRetrieved 'dog1_bin':\n"; | |
print_r( $item ); | |
echo "\nAll vectors (use with caution on large stores):\n"; | |
// print_r($vectorStore->getAllVectors()); // Can be very verbose | |
echo "\nDone.\n"; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment