• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

neuml / txtai / 11062719915

27 Sep 2024 01:06AM UTC coverage: 99.946%. Remained the same
11062719915

push

github

davidmezzetti
Update documentation

7406 of 7410 relevant lines covered (99.95%)

1.0 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

100.0
/src/python/txtai/ann/pgvector.py
1
"""
2
PGVector module
3
"""
4

5
import os
1✔
6

7
# Conditional import
8
try:
1✔
9
    from pgvector.sqlalchemy import Vector
1✔
10

11
    from sqlalchemy import create_engine, delete, text, Column, Index, Integer, MetaData, StaticPool, Table
1✔
12
    from sqlalchemy.orm import Session
1✔
13

14
    PGVECTOR = True
1✔
15
except ImportError:
1✔
16
    PGVECTOR = False
1✔
17

18
from .base import ANN
1✔
19

20

21
class PGVector(ANN):
1✔
22
    """
23
    Builds an ANN index backed by a Postgres database.
24
    """
25

26
    def __init__(self, config):
1✔
27
        super().__init__(config)
1✔
28

29
        if not PGVECTOR:
1✔
30
            raise ImportError('PGVector is not available - install "ann" extra to enable')
1✔
31

32
        # Create engine
33
        self.engine = create_engine(self.setting("url", os.environ.get("ANN_URL")), poolclass=StaticPool, echo=False)
1✔
34

35
        # Initialize pgvector extension
36
        self.database = Session(self.engine)
1✔
37
        self.database.execute(text("CREATE EXTENSION IF NOT EXISTS vector" if self.engine.dialect.name == "postgresql" else "SELECT 1"))
1✔
38
        self.database.commit()
1✔
39

40
        # Table instance
41
        self.table = None
1✔
42

43
    def load(self, path):
1✔
44
        # Reset database to original checkpoint
45
        self.database.rollback()
1✔
46

47
        # Initialize tables
48
        self.initialize()
1✔
49

50
    def index(self, embeddings):
1✔
51
        # Initialize tables
52
        self.initialize(recreate=True)
1✔
53

54
        self.database.execute(self.table.insert(), [{"indexid": x, "embedding": row} for x, row in enumerate(embeddings)])
1✔
55

56
        # Add id offset and index build metadata
57
        self.config["offset"] = embeddings.shape[0]
1✔
58
        self.metadata(self.settings())
1✔
59

60
    def append(self, embeddings):
1✔
61
        self.database.execute(self.table.insert(), [{"indexid": x + self.config["offset"], "embedding": row} for x, row in enumerate(embeddings)])
1✔
62

63
        # Update id offset and index metadata
64
        self.config["offset"] += embeddings.shape[0]
1✔
65
        self.metadata()
1✔
66

67
    def delete(self, ids):
1✔
68
        self.database.execute(delete(self.table).where(self.table.c["indexid"].in_(ids)))
1✔
69

70
    def search(self, queries, limit):
1✔
71
        results = []
1✔
72
        for query in queries:
1✔
73
            # Run query
74
            query = (
1✔
75
                self.database.query(self.table.c["indexid"], self.table.c["embedding"].max_inner_product(query).label("score"))
76
                .order_by("score")
77
                .limit(limit)
78
            )
79

80
            # pgvector returns negative inner product since Postgres only supports ASC order index scans on operators
81
            results.append([(indexid, -score) for indexid, score in query])
1✔
82

83
        return results
1✔
84

85
    def count(self):
1✔
86
        return self.database.query(self.table.c["indexid"]).count()
1✔
87

88
    def save(self, path):
1✔
89
        self.database.commit()
1✔
90

91
    def close(self):
1✔
92
        # Parent logic
93
        super().close()
1✔
94

95
        # Close database connection
96
        self.database.close()
1✔
97

98
    def initialize(self, recreate=False):
1✔
99
        """
100
        Initializes a new database session.
101

102
        Args:
103
            recreate: Recreates the database tables if True
104
        """
105

106
        # Table name
107
        table = self.setting("table", "vectors")
1✔
108

109
        # Create vectors table
110
        self.table = Table(
1✔
111
            table,
112
            MetaData(),
113
            Column("indexid", Integer, primary_key=True, autoincrement=False),
114
            Column("embedding", Vector(self.config["dimensions"])),
115
        )
116

117
        # Create ANN index - inner product is equal to cosine similarity on normalized vectors
118
        index = Index(
1✔
119
            f"{table}-index",
120
            self.table.c["embedding"],
121
            postgresql_using="hnsw",
122
            postgresql_with=self.settings(),
123
            postgresql_ops={"embedding": "vector_ip_ops"},
124
        )
125

126
        # Drop and recreate table
127
        if recreate:
1✔
128
            self.table.drop(self.engine, checkfirst=True)
1✔
129
            index.drop(self.engine, checkfirst=True)
1✔
130

131
        # Create table and index
132
        self.table.create(self.engine, checkfirst=True)
1✔
133
        index.create(self.engine, checkfirst=True)
1✔
134

135
    def settings(self):
1✔
136
        """
137
        Returns settings for this index.
138

139
        Returns:
140
            dict
141
        """
142

143
        return {"m": self.setting("m", 16), "ef_construction": self.setting("efconstruction", 200)}
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc