Практическое сравнение векторных БД. Установка, индексация, поиск, бенчмарк.
import numpy as np import time from openai import OpenAI client = OpenAI() DIM = 1536 # text-embedding-3-small # Генерируем тестовые документы documents = [f"Document {i}: This is about AI agents and RAG systems with vector search." for i in range(1000)] def embed(texts): resp = client.embeddings.create(model="text-embedding-3-small", input=texts) return [r.embedding for r in resp.data] embeddings = embed(documents[:200]) # Эмбеддим первые 200 для сравнения
from pinecone import Pinecone, ServerlessSpec pc = Pinecone(api_key="YOUR_API_KEY") index_name = "benchmark-pinecone" if index_name not in pc.list_indexes().names(): pc.create_index( name=index_name, dimension=DIM, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1") ) pinecone_index = pc.Index(index_name) # Индексация start = time.time() vectors = [(f"doc-{i}", embeddings[i], {"text": documents[i]}) for i in range(200)] pinecone_index.upsert(vectors=vectors) print(f"Pinecone upsert 200 docs: {time.time() - start:.2f}s") # Поиск query_vec = embed(["AI agent vector search"])[0] start = time.time() results = pinecone_index.query(vector=query_vec, top_k=5, include_metadata=True) pinecone_latency = time.time() - start print(f"Pinecone search: {pinecone_latency*1000:.1f}ms")
import weaviate from weaviate.classes.config import Property, DataType, Configure wv = weaviate.connect_to_local() # docker run -p 8080:8080 weaviate if wv.collections.exists("Benchmark"): wv.collections.delete("Benchmark") collection = wv.collections.create( name="Benchmark", properties=[Property(name="text", data_type=DataType.TEXT)], vectorizer_config=Configure.Vectorizer.none(), ) # Индексация с батчами start = time.time() with collection.batch.dynamic() as batch: for i in range(200): batch.add_object( properties={"text": documents[i]}, vector=embeddings[i], ) print(f"Weaviate upsert 200 docs: {time.time() - start:.2f}s") start = time.time() results = collection.query.near_vector( near_vector=query_vec, limit=5, return_properties=["text"] ) weaviate_latency = time.time() - start print(f"Weaviate search: {weaviate_latency*1000:.1f}ms")
from qdrant_client import QdrantClient from qdrant_client.models import Distance, VectorParams, PointStruct qdrant = QdrantClient(host="localhost", port=6333) collection_name = "benchmark_qdrant" if qdrant.collection_exists(collection_name): qdrant.delete_collection(collection_name) qdrant.create_collection( collection_name=collection_name, vectors_config=VectorParams(size=DIM, distance=Distance.COSINE), ) start = time.time() points = [PointStruct(id=i, vector=embeddings[i], payload={"text": documents[i]}) for i in range(200)] qdrant.upsert(collection_name=collection_name, points=points) print(f"Qdrant upsert 200 docs: {time.time() - start:.2f}s") start = time.time() results = qdrant.search( collection_name=collection_name, query_vector=query_vec, limit=5, ) qdrant_latency = time.time() - start print(f"Qdrant search: {qdrant_latency*1000:.1f}ms")
comparison = { "Pinecone": {"type": "Serverless SaaS", "hosting": "Cloud only", "filter": "Metadata + namespace", "scale": "Billions", "best_for": "Zero-ops production RAG"}, "Weaviate": {"type": "OSS + Cloud", "hosting": "Self-host / Cloud", "filter": "GraphQL + BM25 hybrid", "scale": "Millions", "best_for": "Hybrid search + GraphQL APIs"}, "Qdrant": {"type": "OSS (Rust)", "hosting": "Self-host / Cloud", "filter": "Rich payload filters", "scale": "Millions", "best_for": "Low-latency on-prem, advanced filtering"}, } def pick_db(requirements): if "zero-ops" in requirements and "scale" in requirements: return "Pinecone — serverless, auto-scale" elif "hybrid" in requirements or "graphql" in requirements: return "Weaviate — hybrid BM25 + vector" elif "on-prem" in requirements or "latency" in requirements: return "Qdrant — Rust, fastest local" return "Qdrant for local dev, Pinecone for prod scale" print("\n=== COMPARISON ===") for db, info in comparison.items(): print(f"{db:<10} | {info['type']:<20} | {info['best_for']}") print("\nBest for on-prem + low latency:", pick_db(["on-prem", "latency"]))