Skip to content

Vector

Embar supports pgvector, the open-source vector similarity search extension for PostgreSQL.

Before using vector columns, you must install and activate the extension:

CREATE EXTENSION vector;

Creating a Vector Column

Use Vector to store embeddings with a fixed dimension:

import asyncio
import psycopg

from embar.column.common import Integer
from embar.column.pg import Vector
from embar.db.pg import AsyncPgDb
from embar.table import Table

class Document(Table):
    id: Integer = Integer()
    embedding: Vector = Vector(3)  # 3-dimensional vector

async def get_db():
    database_url = "postgres://pg:pw@localhost:25432/db"
    conn = await psycopg.AsyncConnection.connect(database_url)
    await conn.execute("CREATE EXTENSION IF NOT EXISTS vector")
    db = AsyncPgDb(conn)
    await db.migrate([Document])
    return db

async def setup():
    db = await get_db()
    # Insert some documents with embeddings
    await db.insert(Document).values(Document(id=1, embedding=[1.0, 0.0, 0.0]))
    await db.insert(Document).values(Document(id=2, embedding=[0.0, 1.0, 0.0]))
    await db.insert(Document).values(Document(id=3, embedding=[0.0, 0.0, 1.0]))

asyncio.run(setup())

L2 Distance

Use L2Distance for Euclidean distance searches. This uses the <-> operator.

Order By L2 Distance

Find documents ordered by distance to a query vector:

from embar.query.vector import L2Distance

async def order_by_l2():
    db = await get_db()
    query_vector = [1.0, 0.5, 0.0]
    docs = await (
        db.select(Document.all())
        .from_(Document)
        .order_by(L2Distance(Document.embedding, query_vector))
    )
    print([d.id for d in docs])

asyncio.run(order_by_l2())

Filter By L2 Distance

Find documents within a distance threshold:

from embar.query.where import Lt

async def filter_by_l2():
    db = await get_db()
    query_vector = [1.0, 0.0, 0.0]
    docs = await (
        db.select(Document.all())
        .from_(Document)
        .where(Lt(L2Distance(Document.embedding, query_vector), 0.5))
    )
    print([d.id for d in docs])

asyncio.run(filter_by_l2())

Cosine Distance

Use CosineDistance for cosine similarity searches. This uses the <=> operator.

Order By Cosine Distance

from embar.query.vector import CosineDistance

async def order_by_cosine():
    db = await get_db()
    query_vector = [1.0, 0.5, 0.0]
    docs = await (
        db.select(Document.all())
        .from_(Document)
        .order_by(CosineDistance(Document.embedding, query_vector))
    )
    print([d.id for d in docs])

asyncio.run(order_by_cosine())