jacobsapps · June 2, 2025 16:35
diff --git a/gistfile1.txt b/gistfile1.txt
 # Encode text and extract embeddings
 for text in categories:
    try:
        # Tokenize text using CLIP tokenizer
        encoded = tokenizer(
            text,
            return_tensors="np",
            padding="max_length",
            truncation=True,
            max_length=77
        )
        input_ids = encoded["input_ids"].astype("int32")

        # CoreML model expects the tokens under "text"
        input_data = {
            "text": input_ids
        }

        # Run prediction using CoreML model
        output = model.predict(input_data)
        print(f"✅ Output keys for '{text}': {output.keys()}")

        # Extract the embedding output from the model
        embedding_key = list(output.keys())[0]
        embedding = output[embedding_key]
        embedding = embedding.flatten().tolist() if hasattr(embedding, "flatten") else list(embedding)
        norm = np.linalg.norm(embedding)
        normalized = (np.array(embedding) / norm).tolist()
    
        # Save normalized embedding with label
        entries.append({
            "label": text,
            "embedding": normalized
        })

    except Exception as e:
        print(f"⚠️ Failed to process '{text}': {e}")
        continue

 output_filename = "text_embeddings.json"
 with open(output_filename, "w") as f:
    json.dump(entries, f, indent=2)

 print(f"✅ Saved embeddings to {output_filename}")
	# Encode text and extract embeddings
	for text in categories:
	try:
	# Tokenize text using CLIP tokenizer
	encoded = tokenizer(
	text,
	return_tensors="np",
	padding="max_length",
	truncation=True,
	max_length=77
	)
	input_ids = encoded["input_ids"].astype("int32")

	# CoreML model expects the tokens under "text"
	input_data = {
	"text": input_ids
	}

	# Run prediction using CoreML model
	output = model.predict(input_data)
	print(f"✅ Output keys for '{text}': {output.keys()}")

	# Extract the embedding output from the model
	embedding_key = list(output.keys())[0]
	embedding = output[embedding_key]
	embedding = embedding.flatten().tolist() if hasattr(embedding, "flatten") else list(embedding)
	norm = np.linalg.norm(embedding)
	normalized = (np.array(embedding) / norm).tolist()

	# Save normalized embedding with label
	entries.append({
	"label": text,
	"embedding": normalized
	})

	except Exception as e:
	print(f"⚠️ Failed to process '{text}': {e}")
	continue

	output_filename = "text_embeddings.json"
	with open(output_filename, "w") as f:
	json.dump(entries, f, indent=2)

	print(f"✅ Saved embeddings to {output_filename}")