• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

joaoh82 / rust_sqlite / 26391649718

25 May 2026 08:39AM UTC coverage: 68.845%. Remained the same
26391649718

push

github

joaoh82
Merge branch 'main' of https://github.com/joaoh82/rust_sqlite

101 of 108 new or added lines in 5 files covered. (93.52%)

376 existing lines in 5 files now uncovered.

11190 of 16254 relevant lines covered (68.84%)

1.24 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

66.87
/src/sql/db/table.rs
1
use crate::error::{Result, SQLRiteError};
2
use crate::sql::db::secondary_index::{IndexOrigin, SecondaryIndex};
3
use crate::sql::fts::PostingList;
4
use crate::sql::hnsw::{DistanceMetric, HnswIndex};
5
use crate::sql::parser::create::{CreateQuery, ParsedColumn};
6
use std::collections::{BTreeMap, HashMap};
7
use std::fmt;
8
use std::sync::{Arc, Mutex};
9

10
use prettytable::{Cell as PrintCell, Row as PrintRow, Table as PrintTable};
11

12
/// SQLRite data types
13
/// Mapped after SQLite Data Type Storage Classes and SQLite Affinity Type
14
/// (Datatypes In SQLite Version 3)[https://www.sqlite.org/datatype3.html]
15
///
16
/// `Vector(dim)` is the Phase 7a addition — a fixed-dimension dense f32
17
/// array. The dimension is part of the type so a `VECTOR(384)` column
18
/// rejects `[0.1, 0.2, 0.3]` at INSERT time as a clean type error
19
/// rather than silently storing the wrong shape.
20
#[derive(PartialEq, Debug, Clone)]
21
pub enum DataType {
22
    Integer,
23
    Text,
24
    Real,
25
    Bool,
26
    /// Dense f32 vector of fixed dimension. The `usize` is the column's
27
    /// declared dimension; every value stored in the column must have
28
    /// exactly that many elements.
29
    Vector(usize),
30
    /// Phase 7e — JSON column. Stored as canonical UTF-8 text (matches
31
    /// SQLite's JSON1 extension), validated at INSERT time. The
32
    /// `json_extract` family of functions parses on demand and returns
33
    /// either a primitive `Value` (Integer / Real / Text / Bool / Null)
34
    /// or a Text value carrying the JSON-encoded sub-object/array.
35
    /// Q3 originally specified `bincoded serde_json::Value`, but bincode
36
    /// was removed from the engine in Phase 3c — see the scope-correction
37
    /// note in `docs/phase-7-plan.md` for the rationale on switching to
38
    /// text storage.
39
    Json,
40
    None,
41
    Invalid,
42
}
43

44
impl DataType {
45
    /// Constructs a `DataType` from the wire string the parser produces.
46
    /// Pre-Phase-7 the strings were one-of `"integer" | "text" | "real" |
47
    /// "bool" | "none"`. Phase 7a adds `"vector(N)"` (case-insensitive,
48
    /// N a positive integer) for the new vector column type — encoded
49
    /// in-band so we don't have to plumb a richer type through the
50
    /// existing string-based ParsedColumn pipeline.
51
    pub fn new(cmd: String) -> DataType {
2✔
52
        let lower = cmd.to_lowercase();
4✔
53
        match lower.as_str() {
4✔
54
            "integer" => DataType::Integer,
4✔
55
            "text" => DataType::Text,
6✔
56
            "real" => DataType::Real,
3✔
57
            "bool" => DataType::Bool,
3✔
58
            "json" => DataType::Json,
3✔
59
            "none" => DataType::None,
2✔
60
            other if other.starts_with("vector(") && other.ends_with(')') => {
3✔
61
                // Strip the `vector(` prefix and trailing `)`, parse what's
62
                // left as a positive integer dimension. Anything else is
63
                // Invalid — surfaces a clean error at CREATE TABLE time.
64
                let inside = &other["vector(".len()..other.len() - 1];
2✔
65
                match inside.trim().parse::<usize>() {
1✔
66
                    Ok(dim) if dim > 0 => DataType::Vector(dim),
1✔
67
                    _ => {
×
68
                        eprintln!("Invalid VECTOR dimension in {cmd}");
2✔
69
                        DataType::Invalid
1✔
70
                    }
71
                }
72
            }
73
            _ => {
×
74
                eprintln!("Invalid data type given {}", cmd);
2✔
75
                DataType::Invalid
1✔
76
            }
77
        }
78
    }
79

80
    /// Inverse of `new` — returns the canonical lowercased wire string
81
    /// for this DataType. Used by the parser to round-trip
82
    /// `VECTOR(N)` → `DataType::Vector(N)` → `"vector(N)"` into
83
    /// `ParsedColumn::datatype` so the rest of the pipeline keeps
84
    /// working with strings.
85
    pub fn to_wire_string(&self) -> String {
1✔
86
        match self {
1✔
87
            DataType::Integer => "Integer".to_string(),
×
88
            DataType::Text => "Text".to_string(),
×
89
            DataType::Real => "Real".to_string(),
×
90
            DataType::Bool => "Bool".to_string(),
×
91
            DataType::Vector(dim) => format!("vector({dim})"),
1✔
92
            DataType::Json => "Json".to_string(),
×
93
            DataType::None => "None".to_string(),
×
94
            DataType::Invalid => "Invalid".to_string(),
×
95
        }
96
    }
97
}
98

99
impl fmt::Display for DataType {
100
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1✔
101
        match self {
1✔
102
            DataType::Integer => f.write_str("Integer"),
1✔
103
            DataType::Text => f.write_str("Text"),
1✔
104
            DataType::Real => f.write_str("Real"),
1✔
105
            DataType::Bool => f.write_str("Boolean"),
1✔
106
            DataType::Vector(dim) => write!(f, "Vector({dim})"),
1✔
107
            DataType::Json => f.write_str("Json"),
×
108
            DataType::None => f.write_str("None"),
1✔
109
            DataType::Invalid => f.write_str("Invalid"),
1✔
110
        }
111
    }
112
}
113

114
/// The schema for each SQL Table is represented in memory by
115
/// following structure.
116
///
117
/// `rows` is `Arc<Mutex<...>>` rather than `Rc<RefCell<...>>` so `Table`
118
/// (and by extension `Database`) is `Send + Sync` — the Tauri desktop
119
/// app holds the engine in shared state behind a `Mutex<Database>`, and
120
/// Tauri's state container requires its contents to be thread-safe.
121
#[derive(Debug)]
122
pub struct Table {
123
    /// Name of the table
124
    pub tb_name: String,
125
    /// Schema for each column, in declaration order.
126
    pub columns: Vec<Column>,
127
    /// Per-column row storage, keyed by column name. Every column's
128
    /// `Row::T(BTreeMap)` is keyed by rowid, so all columns share the same
129
    /// keyset after each write.
130
    pub rows: Arc<Mutex<HashMap<String, Row>>>,
131
    /// Secondary indexes on this table (Phase 3e). One auto-created entry
132
    /// per UNIQUE or PRIMARY KEY column; explicit `CREATE INDEX` statements
133
    /// add more. Looking up an index: iterate by column name, or by index
134
    /// name via `Table::index_by_name`.
135
    pub secondary_indexes: Vec<SecondaryIndex>,
136
    /// HNSW indexes on VECTOR columns (Phase 7d.2). Maintained in lockstep
137
    /// with row storage on INSERT (incremental); rebuilt on open from the
138
    /// persisted CREATE INDEX SQL. The graph itself is NOT yet persisted —
139
    /// see Phase 7d.3 for cell-encoded graph storage.
140
    pub hnsw_indexes: Vec<HnswIndexEntry>,
141
    /// FTS inverted indexes on TEXT columns (Phase 8b). Maintained in
142
    /// lockstep with row storage on INSERT (incremental); DELETE / UPDATE
143
    /// flag `needs_rebuild` and the next save rebuilds from current rows.
144
    /// The posting lists themselves are NOT yet persisted — Phase 8c
145
    /// wires the cell-encoded `KIND_FTS_POSTING` storage.
146
    pub fts_indexes: Vec<FtsIndexEntry>,
147
    /// ROWID of most recent insert.
148
    pub last_rowid: i64,
149
    /// PRIMARY KEY column name, or "-1" if the table has no PRIMARY KEY.
150
    pub primary_key: String,
151
}
152

153
/// One HNSW index attached to a table. The distance metric is fixed
154
/// at CREATE INDEX time via `USING hnsw (col) WITH (metric = '<m>')`
155
/// (`l2` / `cosine` / `dot`); omitting the WITH clause defaults to L2,
156
/// matching the pre-SQLR-28 behaviour for round-tripping older
157
/// `sqlrite_master` rows that didn't carry a metric.
158
#[derive(Debug, Clone)]
159
pub struct HnswIndexEntry {
160
    /// User-supplied name from `CREATE INDEX <name> …`. Unique across
161
    /// both `secondary_indexes` and `hnsw_indexes` on a given table.
162
    pub name: String,
163
    /// The VECTOR column this index covers.
164
    pub column_name: String,
165
    /// Distance metric the graph was built for. The optimizer's HNSW
166
    /// shortcut only fires when the query's `vec_distance_*` function
167
    /// matches this metric — picking a non-matching distance falls
168
    /// through to brute-force, since the graph topology is metric-
169
    /// specific (an L2-pruned graph isn't a valid cosine search graph
170
    /// in general, and vice versa).
171
    pub metric: DistanceMetric,
172
    /// The graph itself.
173
    pub index: HnswIndex,
174
    /// Phase 7d.3 — true iff a DELETE or UPDATE-on-vector-col has
175
    /// invalidated the graph since the last rebuild. INSERT maintains
176
    /// the graph incrementally and leaves this false. The next save
177
    /// rebuilds dirty indexes from current rows before serializing.
178
    pub needs_rebuild: bool,
179
}
180

181
/// One FTS index attached to a table (Phase 8b). The inverted index
182
/// itself is a [`PostingList`]; metadata (name, column, dirty flag)
183
/// lives here. Mirrors [`HnswIndexEntry`] field-for-field so the
184
/// rebuild-on-save and DELETE/UPDATE invalidation paths can use one
185
/// pattern across both index families.
186
#[derive(Debug, Clone)]
187
pub struct FtsIndexEntry {
188
    /// User-supplied name from `CREATE INDEX <name> … USING fts(<col>)`.
189
    /// Unique across `secondary_indexes`, `hnsw_indexes`, and
190
    /// `fts_indexes` on a given table.
191
    pub name: String,
192
    /// The TEXT column this index covers.
193
    pub column_name: String,
194
    /// The inverted index + per-doc length cache.
195
    pub index: PostingList,
196
    /// True iff a DELETE or UPDATE-on-text-col has invalidated the
197
    /// posting lists since the last rebuild. INSERT maintains the
198
    /// index incrementally and leaves this false. The next save
199
    /// rebuilds dirty indexes from current rows before serializing
200
    /// (mirrors HNSW's Q7 strategy).
201
    pub needs_rebuild: bool,
202
}
203

204
impl Table {
205
    pub fn new(create_query: CreateQuery) -> Self {
2✔
206
        let table_name = create_query.table_name;
2✔
207
        let mut primary_key: String = String::from("-1");
2✔
208
        let columns = create_query.columns;
2✔
209

210
        let mut table_cols: Vec<Column> = vec![];
2✔
211
        let table_rows: Arc<Mutex<HashMap<String, Row>>> = Arc::new(Mutex::new(HashMap::new()));
4✔
212
        let mut secondary_indexes: Vec<SecondaryIndex> = Vec::new();
2✔
213
        for col in &columns {
6✔
214
            let col_name = &col.name;
2✔
215
            if col.is_pk {
4✔
216
                primary_key = col_name.to_string();
2✔
217
            }
218
            table_cols.push(Column::with_default(
4✔
219
                col_name.to_string(),
4✔
220
                col.datatype.to_string(),
4✔
221
                col.is_pk,
2✔
222
                col.not_null,
2✔
223
                col.is_unique,
2✔
224
                col.default.clone(),
2✔
225
            ));
226

227
            let dt = DataType::new(col.datatype.to_string());
2✔
228
            let row_storage = match &dt {
2✔
229
                DataType::Integer => Row::Integer(BTreeMap::new()),
4✔
230
                DataType::Real => Row::Real(BTreeMap::new()),
2✔
231
                DataType::Text => Row::Text(BTreeMap::new()),
4✔
232
                DataType::Bool => Row::Bool(BTreeMap::new()),
2✔
233
                // The dimension is enforced at INSERT time against the
234
                // column's declared DataType::Vector(dim). The Row variant
235
                // itself doesn't carry the dim — every stored Vec<f32>
236
                // already has it via .len().
237
                DataType::Vector(_dim) => Row::Vector(BTreeMap::new()),
2✔
238
                // Phase 7e — JSON columns reuse Text storage (with
239
                // INSERT-time validation that the bytes parse as JSON).
240
                // No new Row variant; json_extract / json_type / etc.
241
                // re-parse from text on demand. See `docs/phase-7-plan.md`
242
                // Q3's scope-correction note for the storage choice.
243
                DataType::Json => Row::Text(BTreeMap::new()),
2✔
244
                DataType::Invalid | DataType::None => Row::None,
1✔
245
            };
246
            table_rows
4✔
247
                .lock()
248
                .expect("Table row storage mutex poisoned")
249
                .insert(col.name.to_string(), row_storage);
×
250

251
            // Auto-create an index for every UNIQUE / PRIMARY KEY column,
252
            // but only for types we know how to index. Real / Bool / Vector
253
            // UNIQUE columns fall back to the linear scan path in
254
            // validate_unique_constraint — same behavior as before 3e.
255
            // (Vector UNIQUE is unusual; the linear-scan path will work
256
            // via Value::Vector PartialEq, just at O(N) cost.)
257
            if (col.is_pk || col.is_unique) && matches!(dt, DataType::Integer | DataType::Text) {
2✔
258
                let name = SecondaryIndex::auto_name(&table_name, &col.name);
2✔
259
                match SecondaryIndex::new(
4✔
260
                    name,
2✔
261
                    table_name.clone(),
4✔
262
                    col.name.clone(),
2✔
263
                    &dt,
×
264
                    true,
×
265
                    IndexOrigin::Auto,
×
266
                ) {
267
                    Ok(idx) => secondary_indexes.push(idx),
4✔
268
                    Err(_) => {
×
269
                        // Unreachable given the matches! guard above, but
270
                        // the builder returns Result so we keep the arm.
271
                    }
272
                }
273
            }
274
        }
275

276
        Table {
277
            tb_name: table_name,
278
            columns: table_cols,
279
            rows: table_rows,
280
            secondary_indexes,
281
            // HNSW indexes only land via explicit CREATE INDEX … USING hnsw
282
            // statements (Phase 7d.2); never auto-created at CREATE TABLE
283
            // time, because there's no UNIQUE-style constraint that
284
            // implies a vector index.
285
            hnsw_indexes: Vec::new(),
2✔
286
            // Same story for FTS indexes — explicit `CREATE INDEX … USING
287
            // fts(<col>)` only (Phase 8b).
288
            fts_indexes: Vec::new(),
2✔
289
            last_rowid: 0,
290
            primary_key,
291
        }
292
    }
293

294
    /// Deep-clones a `Table` for transaction snapshots (Phase 4f).
295
    ///
296
    /// The normal `Clone` derive would shallow-clone the `Arc<Mutex<_>>`
297
    /// wrapping our row storage, leaving both copies sharing the same
298
    /// inner map — mutating the snapshot would corrupt the live table
299
    /// and vice versa. Instead we lock, clone the inner `HashMap`, and
300
    /// wrap it in a fresh `Arc<Mutex<_>>`. Columns and indexes derive
301
    /// `Clone` directly (all their fields are plain data).
302
    pub fn deep_clone(&self) -> Self {
1✔
303
        let cloned_rows: HashMap<String, Row> = {
1✔
304
            let guard = self.rows.lock().expect("row mutex poisoned");
1✔
305
            guard.clone()
2✔
306
        };
307
        Table {
308
            tb_name: self.tb_name.clone(),
1✔
309
            columns: self.columns.clone(),
1✔
310
            rows: Arc::new(Mutex::new(cloned_rows)),
2✔
311
            secondary_indexes: self.secondary_indexes.clone(),
1✔
312
            // HnswIndexEntry derives Clone, so the snapshot owns its own
313
            // graph copy. Phase 4f's snapshot-rollback semantics require
314
            // the snapshot to be fully decoupled from live state.
315
            hnsw_indexes: self.hnsw_indexes.clone(),
1✔
316
            // Same fully-decoupled clone for FTS indexes (Phase 8b).
317
            fts_indexes: self.fts_indexes.clone(),
1✔
318
            last_rowid: self.last_rowid,
1✔
319
            primary_key: self.primary_key.clone(),
1✔
320
        }
321
    }
322

323
    /// Finds an auto- or explicit-index entry for a given column. Returns
324
    /// `None` if the column isn't indexed.
325
    pub fn index_for_column(&self, column: &str) -> Option<&SecondaryIndex> {
2✔
326
        self.secondary_indexes
2✔
327
            .iter()
328
            .find(|i| i.column_name == column)
6✔
329
    }
330

331
    fn index_for_column_mut(&mut self, column: &str) -> Option<&mut SecondaryIndex> {
2✔
332
        self.secondary_indexes
2✔
333
            .iter_mut()
334
            .find(|i| i.column_name == column)
6✔
335
    }
336

337
    /// Finds a secondary index by its own name (e.g., `sqlrite_autoindex_users_email`
338
    /// or a user-provided CREATE INDEX name). Used by DROP INDEX and the
339
    /// rename helpers below.
340
    pub fn index_by_name(&self, name: &str) -> Option<&SecondaryIndex> {
1✔
341
        self.secondary_indexes.iter().find(|i| i.name == name)
3✔
342
    }
343

344
    /// Renames a column in place. Updates row storage, the `Column`
345
    /// metadata, every secondary / HNSW / FTS index whose `column_name`
346
    /// matches, the `primary_key` pointer if the renamed column is the
347
    /// PK, and any auto-index name that embedded the old column name.
348
    ///
349
    /// Caller-side validation (table existence, source-column existence
350
    /// at the surface level, IF EXISTS) lives in the executor; this
351
    /// method enforces the column-level invariants that have to be
352
    /// checked under the `Table` borrow anyway.
353
    pub fn rename_column(&mut self, old: &str, new: &str) -> Result<()> {
1✔
354
        if !self.columns.iter().any(|c| c.column_name == old) {
3✔
355
            return Err(SQLRiteError::General(format!(
×
356
                "column '{old}' does not exist in table '{}'",
×
357
                self.tb_name
×
358
            )));
359
        }
360
        if old != new && self.columns.iter().any(|c| c.column_name == new) {
4✔
361
            return Err(SQLRiteError::General(format!(
1✔
362
                "column '{new}' already exists in table '{}'",
×
363
                self.tb_name
×
364
            )));
365
        }
366
        if old == new {
1✔
367
            return Ok(());
×
368
        }
369

370
        for col in self.columns.iter_mut() {
2✔
371
            if col.column_name == old {
2✔
372
                col.column_name = new.to_string();
1✔
373
            }
374
        }
375

376
        // Re-key the per-column row map.
377
        {
378
            let mut rows = self.rows.lock().expect("rows mutex poisoned");
1✔
379
            if let Some(storage) = rows.remove(old) {
3✔
380
                rows.insert(new.to_string(), storage);
2✔
381
            }
382
        }
383

384
        if self.primary_key == old {
1✔
385
            self.primary_key = new.to_string();
×
386
        }
387

388
        let table_name = self.tb_name.clone();
1✔
389
        for idx in self.secondary_indexes.iter_mut() {
2✔
390
            if idx.column_name == old {
2✔
391
                idx.column_name = new.to_string();
1✔
392
                if idx.origin == IndexOrigin::Auto
2✔
393
                    && idx.name == SecondaryIndex::auto_name(&table_name, old)
1✔
394
                {
395
                    idx.name = SecondaryIndex::auto_name(&table_name, new);
1✔
396
                }
397
            }
398
        }
399
        for entry in self.hnsw_indexes.iter_mut() {
1✔
400
            if entry.column_name == old {
×
401
                entry.column_name = new.to_string();
×
402
            }
403
        }
404
        for entry in self.fts_indexes.iter_mut() {
1✔
405
            if entry.column_name == old {
×
406
                entry.column_name = new.to_string();
×
407
            }
408
        }
409

410
        Ok(())
1✔
411
    }
412

413
    /// Appends a new column to this table from a parsed column spec.
414
    /// The new column's row storage is allocated empty; existing rowids
415
    /// read NULL for the new column unless `parsed.default` is set, in
416
    /// which case those rowids are backfilled with the default value.
417
    ///
418
    /// Rejects PK / UNIQUE on the added column (would require
419
    /// backfill-with-uniqueness-check against existing rows). Rejects
420
    /// NOT NULL without DEFAULT on a non-empty table — same rule SQLite
421
    /// applies, and necessary because we have no other backfill source.
422
    pub fn add_column(&mut self, parsed: ParsedColumn) -> Result<()> {
1✔
423
        if self.contains_column(parsed.name.clone()) {
2✔
424
            return Err(SQLRiteError::General(format!(
2✔
425
                "column '{}' already exists in table '{}'",
×
426
                parsed.name, self.tb_name
×
427
            )));
428
        }
429
        if parsed.is_pk {
1✔
430
            return Err(SQLRiteError::General(
1✔
431
                "cannot ADD COLUMN with PRIMARY KEY constraint on existing table".to_string(),
1✔
432
            ));
433
        }
434
        if parsed.is_unique {
1✔
435
            return Err(SQLRiteError::General(
1✔
436
                "cannot ADD COLUMN with UNIQUE constraint on existing table".to_string(),
1✔
437
            ));
438
        }
439
        let table_has_rows = self
2✔
440
            .columns
×
441
            .first()
442
            .map(|c| {
2✔
443
                self.rows
3✔
444
                    .lock()
1✔
445
                    .expect("rows mutex poisoned")
1✔
446
                    .get(&c.column_name)
1✔
447
                    .map(|r| r.rowids().len())
3✔
448
                    .unwrap_or(0)
1✔
449
                    > 0
1✔
450
            })
451
            .unwrap_or(false);
452
        if parsed.not_null && parsed.default.is_none() && table_has_rows {
2✔
453
            return Err(SQLRiteError::General(format!(
2✔
454
                "cannot ADD COLUMN '{}' NOT NULL without DEFAULT to a non-empty table",
×
455
                parsed.name
×
456
            )));
457
        }
458

459
        let new_column = Column::with_default(
460
            parsed.name.clone(),
2✔
461
            parsed.datatype.clone(),
2✔
462
            parsed.is_pk,
1✔
463
            parsed.not_null,
1✔
464
            parsed.is_unique,
1✔
465
            parsed.default.clone(),
1✔
466
        );
467

468
        // Allocate empty row storage for the new column. Mirrors the
469
        // dispatch in `Table::new` so the new column behaves identically
470
        // to one declared at CREATE TABLE time.
471
        let row_storage = match &new_column.datatype {
1✔
472
            DataType::Integer => Row::Integer(BTreeMap::new()),
2✔
473
            DataType::Real => Row::Real(BTreeMap::new()),
×
474
            DataType::Text => Row::Text(BTreeMap::new()),
2✔
475
            DataType::Bool => Row::Bool(BTreeMap::new()),
×
476
            DataType::Vector(_dim) => Row::Vector(BTreeMap::new()),
×
477
            DataType::Json => Row::Text(BTreeMap::new()),
×
478
            DataType::Invalid | DataType::None => Row::None,
×
479
        };
480
        {
481
            let mut rows = self.rows.lock().expect("rows mutex poisoned");
2✔
482
            rows.insert(parsed.name.clone(), row_storage);
2✔
483
        }
484

485
        // Backfill existing rowids with the default value, if any.
486
        // NULL defaults are a no-op — a missing key in the BTreeMap reads
487
        // as NULL anyway. Type mismatches were caught at `parse_one_column`
488
        // time when the DEFAULT was evaluated against the declared
489
        // datatype; reaching the `_` arm here would indicate a bug.
490
        if let Some(default) = &parsed.default {
2✔
491
            let existing_rowids = self.rowids();
2✔
492
            let mut rows = self.rows.lock().expect("rows mutex poisoned");
2✔
493
            let storage = rows.get_mut(&parsed.name).expect("just inserted");
2✔
494
            match (storage, default) {
1✔
495
                (Row::Integer(tree), Value::Integer(v)) => {
1✔
496
                    let v32 = *v as i32;
1✔
497
                    for rowid in existing_rowids {
2✔
498
                        tree.insert(rowid, v32);
2✔
499
                    }
500
                }
501
                (Row::Real(tree), Value::Real(v)) => {
×
502
                    let v32 = *v as f32;
×
503
                    for rowid in existing_rowids {
×
504
                        tree.insert(rowid, v32);
×
505
                    }
506
                }
507
                (Row::Text(tree), Value::Text(v)) => {
1✔
508
                    for rowid in existing_rowids {
2✔
509
                        tree.insert(rowid, v.clone());
2✔
510
                    }
511
                }
512
                (Row::Bool(tree), Value::Bool(v)) => {
×
513
                    for rowid in existing_rowids {
×
514
                        tree.insert(rowid, *v);
×
515
                    }
516
                }
517
                (_, Value::Null) => {} // no-op
518
                (storage_ref, _) => {
×
519
                    return Err(SQLRiteError::Internal(format!(
×
520
                        "DEFAULT type does not match column storage for '{}': storage variant {:?}, default {:?}",
×
521
                        parsed.name,
×
522
                        std::mem::discriminant(storage_ref),
×
523
                        default
×
524
                    )));
525
                }
526
            }
527
        }
528

529
        self.columns.push(new_column);
1✔
530
        Ok(())
1✔
531
    }
532

533
    /// Removes a column from this table. Refuses to drop the PRIMARY KEY
534
    /// column or the only remaining column. Cascades to every index
535
    /// (auto, explicit, HNSW, FTS) that referenced the column.
536
    pub fn drop_column(&mut self, name: &str) -> Result<()> {
1✔
537
        if !self.contains_column(name.to_string()) {
1✔
538
            return Err(SQLRiteError::General(format!(
×
539
                "column '{name}' does not exist in table '{}'",
×
540
                self.tb_name
×
541
            )));
542
        }
543
        if self.primary_key == name {
1✔
544
            return Err(SQLRiteError::General(format!(
1✔
545
                "cannot drop primary key column '{name}'"
×
546
            )));
547
        }
548
        if self.columns.len() == 1 {
1✔
549
            return Err(SQLRiteError::General(format!(
1✔
550
                "cannot drop the only column of table '{}'",
×
551
                self.tb_name
×
552
            )));
553
        }
554

555
        self.columns.retain(|c| c.column_name != name);
3✔
556
        {
557
            let mut rows = self.rows.lock().expect("rows mutex poisoned");
1✔
558
            rows.remove(name);
2✔
559
        }
560
        self.secondary_indexes.retain(|i| i.column_name != name);
3✔
561
        self.hnsw_indexes.retain(|i| i.column_name != name);
1✔
562
        self.fts_indexes.retain(|i| i.column_name != name);
1✔
563

564
        Ok(())
1✔
565
    }
566

567
    /// Returns a `bool` informing if a `Column` with a specific name exists or not
568
    ///
569
    pub fn contains_column(&self, column: String) -> bool {
2✔
570
        self.columns.iter().any(|col| col.column_name == column)
8✔
571
    }
572

573
    /// Returns the list of column names in declaration order.
574
    pub fn column_names(&self) -> Vec<String> {
1✔
575
        self.columns.iter().map(|c| c.column_name.clone()).collect()
3✔
576
    }
577

578
    /// Returns all rowids currently stored in the table, in ascending order.
579
    /// Every column's BTreeMap has the same keyset, so we just read from the first column.
580
    pub fn rowids(&self) -> Vec<i64> {
2✔
581
        let Some(first) = self.columns.first() else {
2✔
582
            return vec![];
×
583
        };
584
        let rows = self.rows.lock().expect("rows mutex poisoned");
2✔
585
        rows.get(&first.column_name)
4✔
586
            .map(|r| r.rowids())
6✔
587
            .unwrap_or_default()
588
    }
589

590
    /// Reads a single cell at `(column, rowid)`.
591
    pub fn get_value(&self, column: &str, rowid: i64) -> Option<Value> {
2✔
592
        let rows = self.rows.lock().expect("rows mutex poisoned");
2✔
593
        rows.get(column).and_then(|r| r.get(rowid))
8✔
594
    }
595

596
    /// Removes the row identified by `rowid` from every column's storage and
597
    /// from every secondary index entry.
598
    pub fn delete_row(&mut self, rowid: i64) {
1✔
599
        // Snapshot the values we're about to delete so we can strip them
600
        // from secondary indexes by (value, rowid) before the row storage
601
        // disappears.
602
        let per_column_values: Vec<(String, Option<Value>)> = self
2✔
603
            .columns
×
604
            .iter()
605
            .map(|c| (c.column_name.clone(), self.get_value(&c.column_name, rowid)))
3✔
606
            .collect();
607

608
        // Remove from row storage.
609
        {
610
            let rows_clone = Arc::clone(&self.rows);
2✔
611
            let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
2✔
612
            for col in &self.columns {
2✔
613
                if let Some(r) = row_data.get_mut(&col.column_name) {
2✔
614
                    match r {
1✔
615
                        Row::Integer(m) => {
1✔
616
                            m.remove(&rowid);
2✔
617
                        }
618
                        Row::Text(m) => {
1✔
619
                            m.remove(&rowid);
2✔
620
                        }
621
                        Row::Real(m) => {
×
622
                            m.remove(&rowid);
×
623
                        }
624
                        Row::Bool(m) => {
×
625
                            m.remove(&rowid);
×
626
                        }
627
                        Row::Vector(m) => {
1✔
628
                            m.remove(&rowid);
2✔
629
                        }
630
                        Row::None => {}
×
631
                    }
632
                }
633
            }
634
        }
635

636
        // Strip secondary-index entries. Non-indexed columns just don't
637
        // show up in secondary_indexes and are no-ops here.
638
        for (col_name, value) in per_column_values {
2✔
639
            if let Some(idx) = self.index_for_column_mut(&col_name) {
2✔
640
                if let Some(v) = value {
2✔
641
                    idx.remove(&v, rowid);
1✔
642
                }
643
            }
644
        }
645
    }
646

647
    /// Replays a single row at `rowid` when loading a table from disk. Takes
648
    /// one typed value per column (in declaration order); `None` means the
649
    /// stored cell carried a NULL for that column. Unlike `insert_row` this
650
    /// trusts the on-disk state and does *not* re-check UNIQUE — we're
651
    /// rebuilding a state that was already consistent when it was saved.
652
    pub fn restore_row(&mut self, rowid: i64, values: Vec<Option<Value>>) -> Result<()> {
2✔
653
        if values.len() != self.columns.len() {
4✔
654
            return Err(SQLRiteError::Internal(format!(
×
655
                "cell has {} values but table '{}' has {} columns",
×
656
                values.len(),
×
657
                self.tb_name,
×
658
                self.columns.len()
×
659
            )));
660
        }
661

662
        let column_names: Vec<String> =
8✔
663
            self.columns.iter().map(|c| c.column_name.clone()).collect();
×
664

665
        for (i, value) in values.into_iter().enumerate() {
6✔
666
            let col_name = &column_names[i];
4✔
667

668
            // Write into the per-column row storage first (scoped borrow so
669
            // the secondary-index update below doesn't fight over `self`).
670
            {
671
                let rows_clone = Arc::clone(&self.rows);
2✔
672
                let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
4✔
673
                let cell = row_data.get_mut(col_name).ok_or_else(|| {
4✔
674
                    SQLRiteError::Internal(format!("Row storage missing for column '{col_name}'"))
×
675
                })?;
676

677
                match (cell, &value) {
4✔
678
                    // SQL NULL: leave the per-column BTreeMap entry
679
                    // absent. `Row::*::get` returns `None` for missing
680
                    // rowids, which `Table::get_value` relays and the
681
                    // executor's `Identifier` arm renders as
682
                    // `Value::Null`. Mirrors `insert_row`'s NULL path.
683
                    (_, None) => { /* nothing to insert */ }
684
                    (Row::Integer(map), Some(Value::Integer(v))) => {
2✔
685
                        map.insert(rowid, *v as i32);
2✔
686
                    }
687
                    (Row::Text(map), Some(Value::Text(s))) => {
2✔
688
                        map.insert(rowid, s.clone());
2✔
689
                    }
690
                    (Row::Real(map), Some(Value::Real(v))) => {
1✔
691
                        map.insert(rowid, *v as f32);
1✔
692
                    }
693
                    (Row::Bool(map), Some(Value::Bool(v))) => {
1✔
694
                        map.insert(rowid, *v);
1✔
695
                    }
696
                    (Row::Vector(map), Some(Value::Vector(v))) => {
1✔
697
                        map.insert(rowid, v.clone());
1✔
698
                    }
699
                    (row, v) => {
×
700
                        return Err(SQLRiteError::Internal(format!(
×
701
                            "Type mismatch restoring column '{col_name}': storage {row:?} vs value {v:?}"
×
702
                        )));
703
                    }
704
                }
705
            }
706

707
            // Maintain the secondary index (if any). NULL values are skipped
708
            // by `insert`, matching the "NULL is not indexed" convention.
709
            if let Some(v) = &value {
2✔
710
                if let Some(idx) = self.index_for_column_mut(col_name) {
4✔
711
                    idx.insert(v, rowid)?;
2✔
712
                }
713
            }
714
        }
715

716
        if rowid > self.last_rowid {
4✔
717
            self.last_rowid = rowid;
2✔
718
        }
719
        Ok(())
2✔
720
    }
721

722
    /// Extracts a row as an ordered `Vec<Option<Value>>` matching the column
723
    /// declaration order. Returns `None` entries for columns that hold NULL.
724
    /// Used by `save_database` to turn a table's in-memory state into cells.
725
    pub fn extract_row(&self, rowid: i64) -> Vec<Option<Value>> {
2✔
726
        self.columns
2✔
727
            .iter()
728
            .map(|c| match self.get_value(&c.column_name, rowid) {
6✔
729
                Some(Value::Null) => None,
×
730
                Some(v) => Some(v),
2✔
731
                None => None,
1✔
732
            })
733
            .collect()
734
    }
735

736
    /// Overwrites the cell at `(column, rowid)` with `new_val`. Enforces the
737
    /// column's datatype and UNIQUE constraint, and updates any secondary
738
    /// index.
739
    ///
740
    /// Returns `Err` if the column doesn't exist, the value type is incompatible,
741
    /// or writing would violate UNIQUE.
742
    pub fn set_value(&mut self, column: &str, rowid: i64, new_val: Value) -> Result<()> {
1✔
743
        let col_index = self
3✔
744
            .columns
×
745
            .iter()
1✔
746
            .position(|c| c.column_name == column)
3✔
747
            .ok_or_else(|| SQLRiteError::General(format!("Column '{column}' not found")))?;
1✔
748

749
        // No-op write — keep storage exactly the same.
750
        let current = self.get_value(column, rowid);
1✔
751
        if current.as_ref() == Some(&new_val) {
2✔
752
            return Ok(());
×
753
        }
754

755
        // Enforce UNIQUE. Prefer an O(log N) index probe if we have one;
756
        // fall back to a full column scan otherwise (Real/Bool UNIQUE
757
        // columns, which don't get auto-indexed).
758
        if self.columns[col_index].is_unique && !matches!(new_val, Value::Null) {
3✔
759
            if let Some(idx) = self.index_for_column(column) {
1✔
760
                for other in idx.lookup(&new_val) {
3✔
761
                    if other != rowid {
1✔
762
                        return Err(SQLRiteError::General(format!(
1✔
763
                            "UNIQUE constraint violated for column '{column}'"
×
764
                        )));
765
                    }
766
                }
767
            } else {
768
                for other in self.rowids() {
×
769
                    if other == rowid {
×
770
                        continue;
×
771
                    }
772
                    if self.get_value(column, other).as_ref() == Some(&new_val) {
×
773
                        return Err(SQLRiteError::General(format!(
×
774
                            "UNIQUE constraint violated for column '{column}'"
×
775
                        )));
776
                    }
777
                }
778
            }
779
        }
780

781
        // Drop the old index entry before writing the new value, so the
782
        // post-write index insert doesn't clash with the previous state.
783
        if let Some(old) = current {
2✔
784
            if let Some(idx) = self.index_for_column_mut(column) {
2✔
785
                idx.remove(&old, rowid);
×
786
            }
787
        }
788

789
        // Write into the column's Row, type-checking against the declared DataType.
790
        let declared = &self.columns[col_index].datatype;
2✔
791
        {
792
            let rows_clone = Arc::clone(&self.rows);
1✔
793
            let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
2✔
794
            let cell = row_data.get_mut(column).ok_or_else(|| {
2✔
795
                SQLRiteError::Internal(format!("Row storage missing for column '{column}'"))
×
796
            })?;
797

798
            match (cell, &new_val, declared) {
2✔
799
                (Row::Integer(m), Value::Integer(v), _) => {
1✔
800
                    m.insert(rowid, *v as i32);
1✔
801
                }
802
                (Row::Real(m), Value::Real(v), _) => {
×
803
                    m.insert(rowid, *v as f32);
×
804
                }
805
                (Row::Real(m), Value::Integer(v), _) => {
×
806
                    m.insert(rowid, *v as f32);
×
807
                }
808
                (Row::Text(m), Value::Text(v), dt) => {
1✔
809
                    // Phase 7e — UPDATE on a JSON column also validates
810
                    // the new text is well-formed JSON, mirroring INSERT.
811
                    if matches!(dt, DataType::Json) {
2✔
812
                        if let Err(e) = serde_json::from_str::<serde_json::Value>(v) {
1✔
813
                            return Err(SQLRiteError::General(format!(
2✔
814
                                "Type mismatch: expected JSON for column '{column}', got '{v}': {e}"
×
815
                            )));
816
                        }
817
                    }
818
                    m.insert(rowid, v.clone());
2✔
819
                }
820
                (Row::Bool(m), Value::Bool(v), _) => {
×
821
                    m.insert(rowid, *v);
×
822
                }
823
                (Row::Vector(m), Value::Vector(v), DataType::Vector(declared_dim)) => {
1✔
824
                    if v.len() != *declared_dim {
1✔
825
                        return Err(SQLRiteError::General(format!(
×
826
                            "Vector dimension mismatch for column '{column}': declared {declared_dim}, got {}",
×
827
                            v.len()
×
828
                        )));
829
                    }
830
                    m.insert(rowid, v.clone());
2✔
831
                }
832
                // NULL writes: store the sentinel "Null" string for Text; for other
833
                // types we leave storage as-is since those BTreeMaps can't hold NULL today.
834
                (Row::Text(m), Value::Null, _) => {
×
835
                    m.insert(rowid, "Null".to_string());
×
836
                }
837
                (_, new, dt) => {
×
838
                    return Err(SQLRiteError::General(format!(
×
839
                        "Type mismatch: cannot assign {} to column '{column}' of type {dt}",
×
840
                        new.to_display_string()
×
841
                    )));
842
                }
843
            }
844
        }
845

846
        // Maintain the secondary index, if any. NULL values are skipped by
847
        // insert per convention.
848
        if !matches!(new_val, Value::Null) {
1✔
849
            if let Some(idx) = self.index_for_column_mut(column) {
2✔
850
                idx.insert(&new_val, rowid)?;
×
851
            }
852
        }
853

854
        Ok(())
1✔
855
    }
856

857
    /// Returns an immutable reference of `sql::db::table::Column` if the table contains a
858
    /// column with the specified key as a column name.
859
    ///
860
    #[allow(dead_code)]
861
    pub fn get_column(&mut self, column_name: String) -> Result<&Column> {
×
862
        if let Some(column) = self
×
863
            .columns
×
864
            .iter()
865
            .filter(|c| c.column_name == column_name)
×
866
            .collect::<Vec<&Column>>()
867
            .first()
868
        {
869
            Ok(column)
×
870
        } else {
871
            Err(SQLRiteError::General(String::from("Column not found.")))
×
872
        }
873
    }
874

875
    /// Validates if columns and values being inserted violate the UNIQUE constraint.
876
    /// PRIMARY KEY columns are automatically UNIQUE. Uses the corresponding
877
    /// secondary index when one exists (O(log N) lookup); falls back to a
878
    /// linear scan for indexable-but-not-indexed situations (e.g. a Real
879
    /// UNIQUE column — Real isn't in the auto-indexed set).
880
    pub fn validate_unique_constraint(
2✔
881
        &mut self,
882
        cols: &Vec<String>,
883
        values: &Vec<Option<Value>>,
884
    ) -> Result<()> {
885
        for (idx, name) in cols.iter().enumerate() {
4✔
886
            let column = self
4✔
887
                .columns
×
888
                .iter()
2✔
889
                .find(|c| &c.column_name == name)
6✔
890
                .ok_or_else(|| SQLRiteError::General(format!("Column '{name}' not found")))?;
2✔
891
            if !column.is_unique {
2✔
892
                continue;
×
893
            }
894
            let datatype = &column.datatype;
2✔
895

896
            // Standard SQL UNIQUE allows multiple NULLs — skip the check.
897
            let supplied = match &values[idx] {
2✔
898
                None => continue,
×
899
                Some(v) => v,
2✔
900
            };
901

902
            // Type-check the supplied Value against the column's declared
903
            // datatype. Same shape as the dispatch in `insert_row`: an
904
            // INTEGER column accepts Value::Integer; REAL accepts Real or
905
            // widens Integer; TEXT/JSON accepts Text; BOOL accepts Bool;
906
            // VECTOR accepts Vector with a matching dimension. Anything
907
            // else short-circuits the insert with the same error message
908
            // `insert_row` would emit for the same input.
909
            let parsed: Value = match (datatype, supplied) {
2✔
910
                (DataType::Integer, Value::Integer(n)) => Value::Integer(*n),
2✔
911
                (DataType::Integer, other) => {
×
912
                    return Err(SQLRiteError::General(format!(
×
913
                        "Type mismatch: expected INTEGER for column '{name}', got '{}'",
×
914
                        other.to_display_string()
×
915
                    )));
916
                }
917
                (DataType::Text, Value::Text(s)) => Value::Text(s.clone()),
1✔
918
                (DataType::Text, other) => {
×
919
                    return Err(SQLRiteError::General(format!(
×
920
                        "Type mismatch: expected TEXT for column '{name}', got '{}'",
×
921
                        other.to_display_string()
×
922
                    )));
923
                }
924
                (DataType::Real, Value::Real(f)) => Value::Real(*f),
×
925
                (DataType::Real, Value::Integer(n)) => Value::Real(*n as f64),
×
926
                (DataType::Real, other) => {
×
927
                    return Err(SQLRiteError::General(format!(
×
928
                        "Type mismatch: expected REAL for column '{name}', got '{}'",
×
929
                        other.to_display_string()
×
930
                    )));
931
                }
932
                (DataType::Bool, Value::Bool(b)) => Value::Bool(*b),
×
933
                (DataType::Bool, other) => {
×
934
                    return Err(SQLRiteError::General(format!(
×
935
                        "Type mismatch: expected BOOL for column '{name}', got '{}'",
×
936
                        other.to_display_string()
×
937
                    )));
938
                }
939
                (DataType::Vector(declared_dim), Value::Vector(parsed_vec)) => {
×
940
                    if parsed_vec.len() != *declared_dim {
×
941
                        return Err(SQLRiteError::General(format!(
×
942
                            "Vector dimension mismatch for column '{name}': declared {declared_dim}, got {}",
×
943
                            parsed_vec.len()
×
944
                        )));
945
                    }
946
                    Value::Vector(parsed_vec.clone())
×
947
                }
948
                (DataType::Vector(_), other) => {
×
949
                    return Err(SQLRiteError::General(format!(
×
950
                        "Type mismatch: expected VECTOR for column '{name}', got '{}'",
×
951
                        other.to_display_string()
×
952
                    )));
953
                }
954
                (DataType::Json, Value::Text(s)) => {
×
955
                    // JSON values stored as Text. UNIQUE on a JSON column
956
                    // compares the canonical text representation
957
                    // verbatim — `{"a": 1}` and `{"a":1}` are distinct.
958
                    // Document this if anyone actually requests UNIQUE
959
                    // JSON; for MVP, treat-as-text is fine.
960
                    Value::Text(s.clone())
×
961
                }
962
                (DataType::Json, other) => {
×
963
                    return Err(SQLRiteError::General(format!(
×
964
                        "Type mismatch: expected JSON for column '{name}', got '{}'",
×
965
                        other.to_display_string()
×
966
                    )));
967
                }
968
                (DataType::None | DataType::Invalid, _) => {
×
969
                    return Err(SQLRiteError::Internal(format!(
×
970
                        "column '{name}' has an unsupported datatype"
×
971
                    )));
972
                }
973
            };
974

975
            if let Some(secondary) = self.index_for_column(name) {
4✔
976
                if secondary.would_violate_unique(&parsed) {
4✔
977
                    return Err(SQLRiteError::General(format!(
×
978
                        "UNIQUE constraint violated for column '{name}': value '{}' already exists",
×
979
                        parsed.to_display_string()
×
980
                    )));
981
                }
982
            } else {
983
                // No secondary index (Real / Bool UNIQUE). Linear scan.
984
                for other in self.rowids() {
×
985
                    if self.get_value(name, other).as_ref() == Some(&parsed) {
×
986
                        return Err(SQLRiteError::General(format!(
×
987
                            "UNIQUE constraint violated for column '{name}': value '{}' already exists",
×
988
                            parsed.to_display_string()
×
989
                        )));
990
                    }
991
                }
992
            }
993
        }
994
        Ok(())
2✔
995
    }
996

997
    /// Inserts all VALUES in its approprieta COLUMNS, using the ROWID an embedded INDEX on all ROWS
998
    /// Every `Table` keeps track of the `last_rowid` in order to facilitate what the next one would be.
999
    /// One limitation of this data structure is that we can only have one write transaction at a time, otherwise
1000
    /// we could have a race condition on the last_rowid.
1001
    ///
1002
    /// Since we are loosely modeling after SQLite, this is also a limitation of SQLite (allowing only one write transcation at a time),
1003
    /// So we are good. :)
1004
    ///
1005
    /// Returns `Err` (leaving the table unchanged) when the user supplies an
1006
    /// incompatibly-typed value — no more panics on bad input.
1007
    pub fn insert_row(&mut self, cols: &Vec<String>, values: &Vec<Option<Value>>) -> Result<()> {
2✔
1008
        let mut next_rowid = self.last_rowid + 1;
2✔
1009

1010
        // Auto-assign INTEGER PRIMARY KEY when the user omits it; otherwise
1011
        // adopt the supplied value as the new rowid.
1012
        if self.primary_key != "-1" {
2✔
1013
            if !cols.iter().any(|col| col == &self.primary_key) {
6✔
1014
                // Write the auto-assigned PK into row storage, then sync
1015
                // the secondary index.
1016
                let val = next_rowid as i32;
2✔
1017
                let wrote_integer = {
×
1018
                    let rows_clone = Arc::clone(&self.rows);
2✔
1019
                    let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
4✔
1020
                    let table_col_data = row_data.get_mut(&self.primary_key).ok_or_else(|| {
4✔
1021
                        SQLRiteError::Internal(format!(
×
1022
                            "Row storage missing for primary key column '{}'",
×
1023
                            self.primary_key
×
1024
                        ))
1025
                    })?;
1026
                    match table_col_data {
2✔
1027
                        Row::Integer(tree) => {
2✔
1028
                            tree.insert(next_rowid, val);
2✔
1029
                            true
2✔
1030
                        }
1031
                        _ => false, // non-integer PK: auto-assign is a no-op
×
1032
                    }
1033
                };
1034
                if wrote_integer {
2✔
1035
                    let pk = self.primary_key.clone();
2✔
1036
                    if let Some(idx) = self.index_for_column_mut(&pk) {
4✔
1037
                        idx.insert(&Value::Integer(val as i64), next_rowid)?;
2✔
1038
                    }
1039
                }
1040
            } else {
1041
                for i in 0..cols.len() {
4✔
1042
                    if cols[i] == self.primary_key {
4✔
1043
                        next_rowid = match &values[i] {
4✔
1044
                            Some(Value::Integer(n)) => *n,
2✔
1045
                            None => {
×
1046
                                return Err(SQLRiteError::General(format!(
×
1047
                                    "Type mismatch: PRIMARY KEY column '{}' cannot be NULL",
×
1048
                                    self.primary_key
×
1049
                                )));
1050
                            }
1051
                            Some(other) => {
×
1052
                                return Err(SQLRiteError::General(format!(
×
1053
                                    "Type mismatch: PRIMARY KEY column '{}' expects INTEGER, got '{}'",
×
1054
                                    self.primary_key,
×
1055
                                    other.to_display_string()
×
1056
                                )));
1057
                            }
1058
                        };
1059
                    }
1060
                }
1061
            }
1062
        }
1063

1064
        // For every table column, either pick the supplied value or pad with NULL
1065
        // so that every column's BTreeMap keeps the same rowid keyset.
1066
        let column_names = self
2✔
1067
            .columns
×
1068
            .iter()
1069
            .map(|col| col.column_name.to_string())
6✔
1070
            .collect::<Vec<String>>();
1071
        let mut j: usize = 0;
2✔
1072
        for i in 0..column_names.len() {
4✔
1073
            // `None` means SQL NULL: leave the column's BTreeMap entry
1074
            // absent so reads come back as Value::Null via the missing-
1075
            // rowid path.
1076
            let mut val: Option<Value> = None;
2✔
1077
            let key = &column_names[i];
4✔
1078
            let mut column_supplied = false;
2✔
1079

1080
            if let Some(supplied_key) = cols.get(j) {
2✔
1081
                if supplied_key == &column_names[i] {
6✔
1082
                    val = values[j].clone();
4✔
1083
                    column_supplied = true;
2✔
1084
                    j += 1;
2✔
1085
                } else if self.primary_key == column_names[i] {
4✔
1086
                    // PK already stored in the auto-assign branch above.
1087
                    continue;
×
1088
                }
1089
            } else if self.primary_key == column_names[i] {
2✔
1090
                continue;
×
1091
            }
1092

1093
            // Column was omitted from the INSERT column list. Substitute its
1094
            // DEFAULT literal if one was declared at CREATE TABLE time;
1095
            // otherwise it stays as None. SQLite semantics: an *explicit*
1096
            // NULL is preserved as NULL — the default only fires for
1097
            // omitted columns. `DEFAULT NULL` is treated as no default.
1098
            if !column_supplied {
3✔
1099
                val = self.columns[i]
4✔
1100
                    .default
×
1101
                    .clone()
1✔
1102
                    .filter(|v| !matches!(v, Value::Null));
3✔
1103
            }
1104

1105
            // Step 1: write into row storage and compute the typed Value
1106
            // we'll hand to the secondary index (if any).
1107
            let typed_value: Option<Value> = {
×
1108
                let rows_clone = Arc::clone(&self.rows);
4✔
1109
                let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
4✔
1110
                let table_col_data = row_data.get_mut(key).ok_or_else(|| {
4✔
1111
                    SQLRiteError::Internal(format!("Row storage missing for column '{key}'"))
×
1112
                })?;
1113

1114
                match (table_col_data, &val) {
4✔
1115
                    // SQL NULL: leave the BTreeMap entry absent. Indexes are
1116
                    // skipped (Step 2 below short-circuits on None).
1117
                    (_, None) => None,
1✔
1118

1119
                    (Row::Integer(tree), Some(Value::Integer(n))) => {
2✔
1120
                        tree.insert(next_rowid, *n as i32);
2✔
1121
                        Some(Value::Integer(*n))
2✔
1122
                    }
1123
                    (Row::Integer(_), Some(other)) => {
1✔
1124
                        return Err(SQLRiteError::General(format!(
1✔
1125
                            "Type mismatch: expected INTEGER for column '{key}', got '{}'",
×
1126
                            other.to_display_string()
1✔
1127
                        )));
1128
                    }
1129

1130
                    (Row::Text(tree), Some(Value::Text(s))) => {
2✔
1131
                        // Phase 7e — JSON columns share Row::Text storage.
1132
                        // Validate the value parses as JSON before storing;
1133
                        // otherwise we'd happily write `not-json-at-all`
1134
                        // and only fail when json_extract tried to parse
1135
                        // it later.
1136
                        if matches!(self.columns[i].datatype, DataType::Json) {
4✔
1137
                            if let Err(e) = serde_json::from_str::<serde_json::Value>(s) {
1✔
1138
                                return Err(SQLRiteError::General(format!(
2✔
1139
                                    "Type mismatch: expected JSON for column '{key}', got '{s}': {e}"
×
1140
                                )));
1141
                            }
1142
                        }
1143
                        tree.insert(next_rowid, s.clone());
4✔
1144
                        Some(Value::Text(s.clone()))
2✔
1145
                    }
1146
                    (Row::Text(_), Some(other)) => {
×
1147
                        let label = if matches!(self.columns[i].datatype, DataType::Json) {
×
1148
                            "JSON"
×
1149
                        } else {
1150
                            "TEXT"
×
1151
                        };
1152
                        return Err(SQLRiteError::General(format!(
×
1153
                            "Type mismatch: expected {label} for column '{key}', got '{}'",
×
1154
                            other.to_display_string()
×
1155
                        )));
1156
                    }
1157

1158
                    (Row::Real(tree), Some(Value::Real(f))) => {
1✔
1159
                        let f32_val = *f as f32;
1✔
1160
                        tree.insert(next_rowid, f32_val);
1✔
1161
                        Some(Value::Real(*f))
1✔
1162
                    }
1163
                    // Allow integer literals to widen into REAL columns
1164
                    // (matches the previous string-parse behavior where
1165
                    // `INSERT … VALUES (42)` into a REAL column worked).
1166
                    (Row::Real(tree), Some(Value::Integer(n))) => {
1✔
1167
                        let f32_val = *n as f32;
1✔
1168
                        tree.insert(next_rowid, f32_val);
1✔
1169
                        Some(Value::Real(*n as f64))
1✔
1170
                    }
1171
                    (Row::Real(_), Some(other)) => {
×
1172
                        return Err(SQLRiteError::General(format!(
×
1173
                            "Type mismatch: expected REAL for column '{key}', got '{}'",
×
1174
                            other.to_display_string()
×
1175
                        )));
1176
                    }
1177

1178
                    (Row::Bool(tree), Some(Value::Bool(b))) => {
1✔
1179
                        tree.insert(next_rowid, *b);
1✔
1180
                        Some(Value::Bool(*b))
1✔
1181
                    }
1182
                    (Row::Bool(_), Some(other)) => {
×
1183
                        return Err(SQLRiteError::General(format!(
×
1184
                            "Type mismatch: expected BOOL for column '{key}', got '{}'",
×
1185
                            other.to_display_string()
×
1186
                        )));
1187
                    }
1188

1189
                    (Row::Vector(tree), Some(Value::Vector(parsed))) => {
1✔
1190
                        // The parser already turned a bracket-array literal
1191
                        // into a typed Value::Vector. We still need to
1192
                        // dim-check against the column's declared
1193
                        // DataType::Vector(N).
1194
                        let declared_dim = match &self.columns[i].datatype {
2✔
1195
                            DataType::Vector(d) => *d,
1✔
1196
                            other => {
×
1197
                                return Err(SQLRiteError::Internal(format!(
×
1198
                                    "Row::Vector storage on non-Vector column '{key}' (declared as {other})"
×
1199
                                )));
1200
                            }
1201
                        };
1202
                        if parsed.len() != declared_dim {
2✔
1203
                            return Err(SQLRiteError::General(format!(
1✔
1204
                                "Vector dimension mismatch for column '{key}': declared {declared_dim}, got {}",
×
1205
                                parsed.len()
2✔
1206
                            )));
1207
                        }
1208
                        tree.insert(next_rowid, parsed.clone());
2✔
1209
                        Some(Value::Vector(parsed.clone()))
1✔
1210
                    }
1211
                    (Row::Vector(_), Some(other)) => {
×
1212
                        return Err(SQLRiteError::General(format!(
×
1213
                            "Type mismatch: expected VECTOR for column '{key}', got '{}'",
×
1214
                            other.to_display_string()
×
1215
                        )));
1216
                    }
1217

1218
                    (Row::None, _) => {
×
1219
                        return Err(SQLRiteError::Internal(format!(
×
1220
                            "Column '{key}' has no row storage"
×
1221
                        )));
1222
                    }
1223
                }
1224
            };
1225

1226
            // Step 2: maintain the secondary index (if any). insert() is a
1227
            // no-op for Value::Null and cheap for other value kinds.
1228
            if let Some(v) = typed_value.clone() {
4✔
1229
                if let Some(idx) = self.index_for_column_mut(key) {
4✔
1230
                    idx.insert(&v, next_rowid)?;
4✔
1231
                }
1232
            }
1233

1234
            // Step 3 (Phase 7d.2): maintain any HNSW indexes on this column.
1235
            // The HNSW algorithm needs access to other rows' vectors when
1236
            // wiring up neighbor edges, so build a get_vec closure that
1237
            // pulls from the table's row storage (which we *just* updated
1238
            // with the new value).
1239
            if let Some(Value::Vector(new_vec)) = &typed_value {
5✔
1240
                self.maintain_hnsw_on_insert(key, next_rowid, new_vec)?;
1✔
1241
            }
1242

1243
            // Step 4 (Phase 8b): maintain any FTS indexes on this column.
1244
            // Cheap incremental update — PostingList::insert tokenizes
1245
            // the value and adds postings under the new rowid. DELETE
1246
            // and UPDATE take the rebuild-on-save path instead (Q7).
1247
            if let Some(Value::Text(text)) = &typed_value {
6✔
1248
                self.maintain_fts_on_insert(key, next_rowid, text);
2✔
1249
            }
1250
        }
1251
        self.last_rowid = next_rowid;
2✔
1252
        Ok(())
2✔
1253
    }
1254

1255
    /// After a row insert, push the new (rowid, vector) into every HNSW
1256
    /// index whose column matches `column`. Split out of `insert_row` so
1257
    /// the borrowing dance — we need both `&self.rows` (read other
1258
    /// vectors) and `&mut self.hnsw_indexes` (insert into the graph) —
1259
    /// stays localized.
1260
    fn maintain_hnsw_on_insert(&mut self, column: &str, rowid: i64, new_vec: &[f32]) -> Result<()> {
1✔
1261
        self.rebuild_dirty_hnsw_indexes()?;
1✔
1262

1263
        // Snapshot the current vector storage so the get_vec closure
1264
        // doesn't fight with `&mut self.hnsw_indexes`. For a typical
1265
        // HNSW insert we touch ef_construction × log(N) other vectors,
1266
        // so the snapshot cost is small relative to the graph wiring.
1267
        let mut vec_snapshot: HashMap<i64, Vec<f32>> = HashMap::new();
1✔
1268
        {
1269
            let row_data = self.rows.lock().expect("rows mutex poisoned");
2✔
1270
            if let Some(Row::Vector(map)) = row_data.get(column) {
3✔
1271
                for (id, v) in map.iter() {
1✔
1272
                    vec_snapshot.insert(*id, v.clone());
1✔
1273
                }
1274
            }
1275
        }
1276
        // The new row was just written into row storage — make sure the
1277
        // snapshot reflects it (it should, but defensive).
1278
        vec_snapshot.insert(rowid, new_vec.to_vec());
1✔
1279

1280
        for entry in &mut self.hnsw_indexes {
1✔
1281
            if entry.column_name == column {
2✔
1282
                entry.index.insert(rowid, new_vec, |id| {
2✔
1283
                    vec_snapshot.get(&id).cloned().unwrap_or_default()
1✔
1284
                })?;
1285
            }
1286
        }
1287
        Ok(())
1✔
1288
    }
1289

1290
    /// Rebuilds any dirty HNSW index on this table from the current
1291
    /// vector column storage. DELETE / UPDATE mark indexes dirty because
1292
    /// stale graph edges may still point at removed rowids; this makes
1293
    /// the next in-memory operation see a clean graph without requiring
1294
    /// a close/reopen or save round-trip.
1295
    pub fn rebuild_dirty_hnsw_indexes(&mut self) -> Result<()> {
2✔
1296
        let dirty: Vec<(String, String, DistanceMetric)> = self
4✔
NEW
1297
            .hnsw_indexes
×
1298
            .iter()
1299
            .filter(|e| e.needs_rebuild)
4✔
1300
            .map(|e| (e.name.clone(), e.column_name.clone(), e.metric))
4✔
1301
            .collect();
1302
        if dirty.is_empty() {
4✔
1303
            return Ok(());
2✔
1304
        }
1305

1306
        for (idx_name, col_name, metric) in dirty {
3✔
1307
            let mut vectors: Vec<(i64, Vec<f32>)> = Vec::new();
1✔
1308
            {
1309
                let row_data = self.rows.lock().expect("rows mutex poisoned");
2✔
1310
                if let Some(Row::Vector(map)) = row_data.get(&col_name) {
3✔
1311
                    for (id, v) in map.iter() {
1✔
1312
                        vectors.push((*id, v.clone()));
1✔
1313
                    }
1314
                }
1315
            }
1316

1317
            let snapshot: HashMap<i64, Vec<f32>> = vectors.iter().cloned().collect();
1✔
1318
            let mut new_idx = HnswIndex::new(metric, 0xC0FFEE);
2✔
1319
            vectors.sort_by_key(|(id, _)| *id);
4✔
1320
            for (id, v) in &vectors {
1✔
1321
                new_idx.insert(*id, v, |q| snapshot.get(&q).cloned().unwrap_or_default())?;
4✔
1322
            }
1323

1324
            if let Some(entry) = self.hnsw_indexes.iter_mut().find(|e| e.name == idx_name) {
4✔
1325
                entry.index = new_idx;
1✔
1326
                entry.needs_rebuild = false;
1✔
1327
            }
1328
        }
1329
        Ok(())
1✔
1330
    }
1331

1332
    /// After a row insert, push the new (rowid, text) into every FTS
1333
    /// index whose column matches `column`. Phase 8b.
1334
    ///
1335
    /// Mirrors [`Self::maintain_hnsw_on_insert`] but the FTS index is
1336
    /// self-contained — `PostingList::insert` only needs the new doc's
1337
    /// text, not the rest of the corpus, so there's no snapshot dance.
1338
    fn maintain_fts_on_insert(&mut self, column: &str, rowid: i64, text: &str) {
2✔
1339
        for entry in &mut self.fts_indexes {
4✔
1340
            if entry.column_name == column {
1✔
1341
                entry.index.insert(rowid, text);
1✔
1342
            }
1343
        }
1344
    }
1345

1346
    /// Print the table schema to standard output in a pretty formatted way.
1347
    ///
1348
    /// # Example
1349
    ///
1350
    /// ```text
1351
    /// let table = Table::new(payload);
1352
    /// table.print_table_schema();
1353
    ///
1354
    /// Prints to standard output:
1355
    ///    +-------------+-----------+-------------+--------+----------+
1356
    ///    | Column Name | Data Type | PRIMARY KEY | UNIQUE | NOT NULL |
1357
    ///    +-------------+-----------+-------------+--------+----------+
1358
    ///    | id          | Integer   | true        | true   | true     |
1359
    ///    +-------------+-----------+-------------+--------+----------+
1360
    ///    | name        | Text      | false       | true   | false    |
1361
    ///    +-------------+-----------+-------------+--------+----------+
1362
    ///    | email       | Text      | false       | false  | false    |
1363
    ///    +-------------+-----------+-------------+--------+----------+
1364
    /// ```
1365
    ///
1366
    pub fn print_table_schema(&self) -> Result<usize> {
1✔
1367
        let mut table = PrintTable::new();
1✔
1368
        table.add_row(row![
3✔
UNCOV
1369
            "Column Name",
×
UNCOV
1370
            "Data Type",
×
UNCOV
1371
            "PRIMARY KEY",
×
UNCOV
1372
            "UNIQUE",
×
UNCOV
1373
            "NOT NULL"
×
1374
        ]);
1375

1376
        for col in &self.columns {
1✔
1377
            table.add_row(row![
7✔
1378
                col.column_name,
1✔
UNCOV
1379
                col.datatype,
×
1380
                col.is_pk,
1✔
1381
                col.is_unique,
1✔
1382
                col.not_null
1✔
1383
            ]);
1384
        }
1385

1386
        table.printstd();
1✔
1387
        Ok(table.len() * 2 + 1)
1✔
1388
    }
1389

1390
    /// Print the table data to standard output in a pretty formatted way.
1391
    ///
1392
    /// # Example
1393
    ///
1394
    /// ```text
1395
    /// let db_table = db.get_table_mut(table_name.to_string()).unwrap();
1396
    /// db_table.print_table_data();
1397
    ///
1398
    /// Prints to standard output:
1399
    ///     +----+---------+------------------------+
1400
    ///     | id | name    | email                  |
1401
    ///     +----+---------+------------------------+
1402
    ///     | 1  | "Jack"  | "jack@mail.com"        |
1403
    ///     +----+---------+------------------------+
1404
    ///     | 10 | "Bob"   | "bob@main.com"         |
1405
    ///     +----+---------+------------------------+
1406
    ///     | 11 | "Bill"  | "bill@main.com"        |
1407
    ///     +----+---------+------------------------+
1408
    /// ```
1409
    ///
UNCOV
1410
    pub fn print_table_data(&self) {
×
UNCOV
1411
        let mut print_table = PrintTable::new();
×
1412

UNCOV
1413
        let column_names = self
×
1414
            .columns
×
1415
            .iter()
1416
            .map(|col| col.column_name.to_string())
×
1417
            .collect::<Vec<String>>();
1418

1419
        let header_row = PrintRow::new(
UNCOV
1420
            column_names
×
UNCOV
1421
                .iter()
×
UNCOV
1422
                .map(|col| PrintCell::new(col))
×
UNCOV
1423
                .collect::<Vec<PrintCell>>(),
×
1424
        );
1425

UNCOV
1426
        let rows_clone = Arc::clone(&self.rows);
×
UNCOV
1427
        let row_data = rows_clone.lock().expect("rows mutex poisoned");
×
UNCOV
1428
        let first_col_data = row_data
×
UNCOV
1429
            .get(&self.columns.first().unwrap().column_name)
×
1430
            .unwrap();
UNCOV
1431
        let num_rows = first_col_data.count();
×
UNCOV
1432
        let mut print_table_rows: Vec<PrintRow> = vec![PrintRow::new(vec![]); num_rows];
×
1433

UNCOV
1434
        for col_name in &column_names {
×
UNCOV
1435
            let col_val = row_data
×
UNCOV
1436
                .get(col_name)
×
1437
                .expect("Can't find any rows with the given column");
UNCOV
1438
            let columns: Vec<String> = col_val.get_serialized_col_data();
×
1439

UNCOV
1440
            for i in 0..num_rows {
×
UNCOV
1441
                if let Some(cell) = &columns.get(i) {
×
UNCOV
1442
                    print_table_rows[i].add_cell(PrintCell::new(cell));
×
1443
                } else {
UNCOV
1444
                    print_table_rows[i].add_cell(PrintCell::new(""));
×
1445
                }
1446
            }
1447
        }
1448

UNCOV
1449
        print_table.add_row(header_row);
×
UNCOV
1450
        for row in print_table_rows {
×
UNCOV
1451
            print_table.add_row(row);
×
1452
        }
1453

UNCOV
1454
        print_table.printstd();
×
1455
    }
1456
}
1457

1458
/// The schema for each SQL column in every table.
1459
///
1460
/// Per-column index state moved to `Table::secondary_indexes` in Phase 3e —
1461
/// a single `Column` describes the declared schema (name, type, constraints)
1462
/// and nothing more.
1463
#[derive(PartialEq, Debug, Clone)]
1464
pub struct Column {
1465
    pub column_name: String,
1466
    pub datatype: DataType,
1467
    pub is_pk: bool,
1468
    pub not_null: bool,
1469
    pub is_unique: bool,
1470
    /// Literal value to substitute when this column is omitted from an
1471
    /// INSERT. Restricted to literal expressions at CREATE TABLE time.
1472
    /// `None` means "no DEFAULT declared"; an INSERT that omits the column
1473
    /// gets `Value::Null` instead.
1474
    pub default: Option<Value>,
1475
}
1476

1477
impl Column {
1478
    /// Builds a `Column` without a `DEFAULT` clause. Existing call sites
1479
    /// (catalog-table setup, test fixtures) keep working unchanged.
1480
    pub fn new(
2✔
1481
        name: String,
1482
        datatype: String,
1483
        is_pk: bool,
1484
        not_null: bool,
1485
        is_unique: bool,
1486
    ) -> Self {
1487
        Self::with_default(name, datatype, is_pk, not_null, is_unique, None)
2✔
1488
    }
1489

1490
    /// Builds a `Column` with an optional `DEFAULT` literal. Used by the
1491
    /// CREATE TABLE / `parse_create_sql` paths that propagate user-supplied
1492
    /// defaults from `ParsedColumn`.
1493
    pub fn with_default(
2✔
1494
        name: String,
1495
        datatype: String,
1496
        is_pk: bool,
1497
        not_null: bool,
1498
        is_unique: bool,
1499
        default: Option<Value>,
1500
    ) -> Self {
1501
        let dt = DataType::new(datatype);
4✔
1502
        Column {
1503
            column_name: name,
1504
            datatype: dt,
1505
            is_pk,
1506
            not_null,
1507
            is_unique,
1508
            default,
1509
        }
1510
    }
1511
}
1512

1513
/// The schema for each SQL row in every table is represented in memory
1514
/// by following structure
1515
///
1516
/// This is an enum representing each of the available types organized in a BTreeMap
1517
/// data structure, using the ROWID and key and each corresponding type as value
1518
#[derive(PartialEq, Debug, Clone)]
1519
pub enum Row {
1520
    Integer(BTreeMap<i64, i32>),
1521
    Text(BTreeMap<i64, String>),
1522
    Real(BTreeMap<i64, f32>),
1523
    Bool(BTreeMap<i64, bool>),
1524
    /// Phase 7a: dense f32 vector storage. Each `Vec<f32>` should have
1525
    /// length matching the column's declared `DataType::Vector(dim)`,
1526
    /// enforced at INSERT time. The Row variant doesn't carry the dim —
1527
    /// it lives in the column metadata.
1528
    Vector(BTreeMap<i64, Vec<f32>>),
1529
    None,
1530
}
1531

1532
impl Row {
UNCOV
1533
    fn get_serialized_col_data(&self) -> Vec<String> {
×
UNCOV
1534
        match self {
×
UNCOV
1535
            Row::Integer(cd) => cd.values().map(|v| v.to_string()).collect(),
×
UNCOV
1536
            Row::Real(cd) => cd.values().map(|v| v.to_string()).collect(),
×
UNCOV
1537
            Row::Text(cd) => cd.values().map(|v| v.to_string()).collect(),
×
UNCOV
1538
            Row::Bool(cd) => cd.values().map(|v| v.to_string()).collect(),
×
UNCOV
1539
            Row::Vector(cd) => cd.values().map(format_vector_for_display).collect(),
×
UNCOV
1540
            Row::None => panic!("Found None in columns"),
×
1541
        }
1542
    }
1543

UNCOV
1544
    fn count(&self) -> usize {
×
UNCOV
1545
        match self {
×
UNCOV
1546
            Row::Integer(cd) => cd.len(),
×
UNCOV
1547
            Row::Real(cd) => cd.len(),
×
UNCOV
1548
            Row::Text(cd) => cd.len(),
×
UNCOV
1549
            Row::Bool(cd) => cd.len(),
×
UNCOV
1550
            Row::Vector(cd) => cd.len(),
×
UNCOV
1551
            Row::None => panic!("Found None in columns"),
×
1552
        }
1553
    }
1554

1555
    /// Every column's BTreeMap is keyed by ROWID. All columns share the same keyset
1556
    /// after an INSERT (missing columns are padded), so any column's keys are a valid
1557
    /// iteration of the table's rowids.
1558
    pub fn rowids(&self) -> Vec<i64> {
2✔
1559
        match self {
2✔
1560
            Row::Integer(m) => m.keys().copied().collect(),
2✔
1561
            Row::Text(m) => m.keys().copied().collect(),
2✔
UNCOV
1562
            Row::Real(m) => m.keys().copied().collect(),
×
UNCOV
1563
            Row::Bool(m) => m.keys().copied().collect(),
×
UNCOV
1564
            Row::Vector(m) => m.keys().copied().collect(),
×
UNCOV
1565
            Row::None => vec![],
×
1566
        }
1567
    }
1568

1569
    pub fn get(&self, rowid: i64) -> Option<Value> {
2✔
1570
        match self {
2✔
1571
            Row::Integer(m) => m.get(&rowid).map(|v| Value::Integer(i64::from(*v))),
6✔
1572
            // INSERT stores the literal string "Null" in Text columns that were omitted
1573
            // from the query — re-map that back to a real NULL on read.
1574
            Row::Text(m) => m.get(&rowid).map(|v| {
4✔
1575
                if v == "Null" {
4✔
UNCOV
1576
                    Value::Null
×
1577
                } else {
1578
                    Value::Text(v.clone())
2✔
1579
                }
1580
            }),
1581
            Row::Real(m) => m.get(&rowid).map(|v| Value::Real(f64::from(*v))),
3✔
1582
            Row::Bool(m) => m.get(&rowid).map(|v| Value::Bool(*v)),
3✔
1583
            Row::Vector(m) => m.get(&rowid).map(|v| Value::Vector(v.clone())),
3✔
1584
            Row::None => None,
×
1585
        }
1586
    }
1587
}
1588

1589
/// Render a vector for human display. Used by both `Row::get_serialized_col_data`
1590
/// (for the REPL's print-table path) and `Value::to_display_string`.
1591
///
1592
/// Format: `[0.1, 0.2, 0.3]` — JSON-like, decimal-minimal via `{}` Display.
1593
/// For high-dimensional vectors (e.g. 384 elements) this produces a long
1594
/// line; truncation ellipsis is a future polish (see Phase 7 plan, "What
1595
/// this proposal does NOT commit to").
1596
fn format_vector_for_display(v: &Vec<f32>) -> String {
1✔
1597
    let mut s = String::with_capacity(v.len() * 6 + 2);
1✔
1598
    s.push('[');
1✔
1599
    for (i, x) in v.iter().enumerate() {
1✔
1600
        if i > 0 {
1✔
1601
            s.push_str(", ");
1✔
1602
        }
1603
        // Default f32 Display picks the minimal-roundtrip representation,
1604
        // so 0.1f32 prints as "0.1" not "0.10000000149011612". Good enough.
1605
        s.push_str(&x.to_string());
2✔
1606
    }
1607
    s.push(']');
1✔
1608
    s
1✔
1609
}
1610

1611
/// Runtime value produced by query execution. Separate from the on-disk `Row` enum
1612
/// so the executor can carry typed values (including NULL) across operators.
1613
#[derive(Debug, Clone, PartialEq)]
1614
pub enum Value {
1615
    Integer(i64),
1616
    Text(String),
1617
    Real(f64),
1618
    Bool(bool),
1619
    /// Phase 7a: dense f32 vector as a runtime value. Carries its own
1620
    /// dimension implicitly via `Vec::len`; the column it's being
1621
    /// assigned to has a declared `DataType::Vector(N)` that's checked
1622
    /// at INSERT/UPDATE time.
1623
    Vector(Vec<f32>),
1624
    Null,
1625
}
1626

1627
impl Value {
1628
    pub fn to_display_string(&self) -> String {
1✔
1629
        match self {
1✔
1630
            Value::Integer(v) => v.to_string(),
1✔
1631
            Value::Text(s) => s.clone(),
1✔
UNCOV
1632
            Value::Real(f) => f.to_string(),
×
UNCOV
1633
            Value::Bool(b) => b.to_string(),
×
1634
            Value::Vector(v) => format_vector_for_display(v),
1✔
1635
            Value::Null => String::from("NULL"),
1636
        }
1637
    }
1638
}
1639

1640
/// Parse a bracket-array literal like `"[0.1, 0.2, 0.3]"` (or `"[1, 2, 3]"`)
1641
/// into a `Vec<f32>`. The parser/insert pipeline stores vector literals as
1642
/// strings in `InsertQuery::rows` (a `Vec<Vec<String>>`); this helper is
1643
/// the inverse — turn the string back into a typed vector at the boundary
1644
/// where we actually need element-typed data.
1645
///
1646
/// Accepts:
1647
/// - `[]` → empty vector (caller's dimension check rejects it for VECTOR(N≥1))
1648
/// - `[0.1, 0.2, 0.3]` → standard float syntax
1649
/// - `[1, 2, 3]` → integers, coerced to f32 (matches `VALUES (1, 2)` for
1650
///   `REAL` columns; we widen ints to floats automatically)
1651
/// - whitespace tolerated everywhere (Python/JSON/pgvector convention)
1652
///
1653
/// Rejects with a descriptive message:
1654
/// - missing `[` / `]`
1655
/// - non-numeric elements (`['foo', 0.1]`)
1656
/// - NaN / Inf literals (we accept them via `f32::from_str` but caller can
1657
///   reject if undesired — for now we let them through; HNSW etc. will
1658
///   reject NaN at index time)
1659
pub fn parse_vector_literal(s: &str) -> Result<Vec<f32>> {
1✔
1660
    let trimmed = s.trim();
1✔
1661
    if !trimmed.starts_with('[') || !trimmed.ends_with(']') {
2✔
1662
        return Err(SQLRiteError::General(format!(
1✔
1663
            "expected bracket-array literal `[...]`, got `{s}`"
1664
        )));
1665
    }
1666
    let inner = &trimmed[1..trimmed.len() - 1].trim();
2✔
1667
    if inner.is_empty() {
1✔
1668
        return Ok(Vec::new());
1✔
1669
    }
1670
    let mut out = Vec::new();
1✔
1671
    for (i, part) in inner.split(',').enumerate() {
2✔
1672
        let element = part.trim();
2✔
1673
        let parsed: f32 = element.parse().map_err(|_| {
3✔
1674
            SQLRiteError::General(format!("vector element {i} (`{element}`) is not a number"))
1✔
1675
        })?;
1676
        out.push(parsed);
1✔
1677
    }
1678
    Ok(out)
1✔
1679
}
1680

1681
#[cfg(test)]
1682
mod tests {
1683
    use super::*;
1684
    use crate::sql::dialect::SqlriteDialect;
1685
    use sqlparser::parser::Parser;
1686

1687
    #[test]
1688
    fn datatype_display_trait_test() {
3✔
1689
        let integer = DataType::Integer;
1✔
1690
        let text = DataType::Text;
1✔
1691
        let real = DataType::Real;
1✔
1692
        let boolean = DataType::Bool;
1✔
1693
        let vector = DataType::Vector(384);
1✔
1694
        let none = DataType::None;
1✔
1695
        let invalid = DataType::Invalid;
1✔
1696

1697
        assert_eq!(format!("{}", integer), "Integer");
1✔
1698
        assert_eq!(format!("{}", text), "Text");
1✔
1699
        assert_eq!(format!("{}", real), "Real");
1✔
1700
        assert_eq!(format!("{}", boolean), "Boolean");
1✔
1701
        assert_eq!(format!("{}", vector), "Vector(384)");
1✔
1702
        assert_eq!(format!("{}", none), "None");
1✔
1703
        assert_eq!(format!("{}", invalid), "Invalid");
1✔
1704
    }
1705

1706
    // -----------------------------------------------------------------
1707
    // Phase 7a — VECTOR(N) column type
1708
    // -----------------------------------------------------------------
1709

1710
    #[test]
1711
    fn datatype_new_parses_vector_dim() {
3✔
1712
        // Standard cases.
1713
        assert_eq!(DataType::new("vector(1)".to_string()), DataType::Vector(1));
1✔
1714
        assert_eq!(
1✔
1715
            DataType::new("vector(384)".to_string()),
1✔
1716
            DataType::Vector(384)
1717
        );
1718
        assert_eq!(
1✔
1719
            DataType::new("vector(1536)".to_string()),
1✔
1720
            DataType::Vector(1536)
1721
        );
1722

1723
        // Case-insensitive on the keyword.
1724
        assert_eq!(
1✔
1725
            DataType::new("VECTOR(384)".to_string()),
1✔
1726
            DataType::Vector(384)
1727
        );
1728

1729
        // Whitespace inside parens tolerated (the create-parser strips it
1730
        // but the string-based round-trip in DataType::new is the one place
1731
        // we don't fully control input formatting).
1732
        assert_eq!(
1✔
1733
            DataType::new("vector( 64 )".to_string()),
1✔
1734
            DataType::Vector(64)
1735
        );
1736
    }
1737

1738
    #[test]
1739
    fn datatype_new_rejects_bad_vector_strings() {
3✔
1740
        // dim = 0 is rejected (Q2: VECTOR(N≥1)).
1741
        assert_eq!(DataType::new("vector(0)".to_string()), DataType::Invalid);
1✔
1742
        // Non-numeric dim.
1743
        assert_eq!(DataType::new("vector(abc)".to_string()), DataType::Invalid);
1✔
1744
        // Empty parens.
1745
        assert_eq!(DataType::new("vector()".to_string()), DataType::Invalid);
1✔
1746
        // Negative dim wouldn't even parse as usize, so falls into Invalid.
1747
        assert_eq!(DataType::new("vector(-3)".to_string()), DataType::Invalid);
1✔
1748
    }
1749

1750
    #[test]
1751
    fn datatype_to_wire_string_round_trips_vector() {
3✔
1752
        let dt = DataType::Vector(384);
1✔
1753
        let wire = dt.to_wire_string();
1✔
1754
        assert_eq!(wire, "vector(384)");
2✔
1755
        // And feeds back through DataType::new losslessly — this is the
1756
        // round-trip the ParsedColumn pipeline relies on.
1757
        assert_eq!(DataType::new(wire), DataType::Vector(384));
1✔
1758
    }
1759

1760
    #[test]
1761
    fn parse_vector_literal_accepts_floats() {
3✔
1762
        let v = parse_vector_literal("[0.1, 0.2, 0.3]").expect("parse");
1✔
1763
        assert_eq!(v, vec![0.1f32, 0.2, 0.3]);
2✔
1764
    }
1765

1766
    #[test]
1767
    fn parse_vector_literal_accepts_ints_widening_to_f32() {
3✔
1768
        let v = parse_vector_literal("[1, 2, 3]").expect("parse");
1✔
1769
        assert_eq!(v, vec![1.0f32, 2.0, 3.0]);
2✔
1770
    }
1771

1772
    #[test]
1773
    fn parse_vector_literal_handles_negatives_and_whitespace() {
3✔
1774
        let v = parse_vector_literal("[ -1.5 ,  2.0,  -3.5 ]").expect("parse");
1✔
1775
        assert_eq!(v, vec![-1.5f32, 2.0, -3.5]);
2✔
1776
    }
1777

1778
    #[test]
1779
    fn parse_vector_literal_empty_brackets_is_empty_vec() {
3✔
1780
        let v = parse_vector_literal("[]").expect("parse");
1✔
1781
        assert!(v.is_empty());
2✔
1782
    }
1783

1784
    #[test]
1785
    fn parse_vector_literal_rejects_non_bracketed() {
3✔
1786
        assert!(parse_vector_literal("0.1, 0.2").is_err());
1✔
1787
        assert!(parse_vector_literal("(0.1, 0.2)").is_err());
1✔
1788
        assert!(parse_vector_literal("[0.1, 0.2").is_err()); // missing ]
1✔
1789
        assert!(parse_vector_literal("0.1, 0.2]").is_err()); // missing [
1✔
1790
    }
1791

1792
    #[test]
1793
    fn parse_vector_literal_rejects_non_numeric_elements() {
4✔
1794
        let err = parse_vector_literal("[1.0, 'foo', 3.0]").unwrap_err();
1✔
1795
        let msg = format!("{err}");
2✔
UNCOV
1796
        assert!(
×
1797
            msg.contains("vector element 1") && msg.contains("'foo'"),
3✔
1798
            "error message should pinpoint the bad element: got `{msg}`"
1799
        );
1800
    }
1801

1802
    #[test]
1803
    fn value_vector_display_format() {
3✔
1804
        let v = Value::Vector(vec![0.1, 0.2, 0.3]);
1✔
1805
        assert_eq!(v.to_display_string(), "[0.1, 0.2, 0.3]");
2✔
1806

1807
        // Empty vector displays as `[]`.
1808
        let empty = Value::Vector(vec![]);
1✔
1809
        assert_eq!(empty.to_display_string(), "[]");
2✔
1810
    }
1811

1812
    #[test]
1813
    fn create_new_table_test() {
3✔
1814
        let query_statement = "CREATE TABLE contacts (
1✔
1815
            id INTEGER PRIMARY KEY,
1816
            first_name TEXT NOT NULL,
1817
            last_name TEXT NOT NULl,
1818
            email TEXT NOT NULL UNIQUE,
1819
            active BOOL,
1820
            score REAL
1821
        );";
1822
        let dialect = SqlriteDialect::new();
1✔
1823
        let mut ast = Parser::parse_sql(&dialect, query_statement).unwrap();
1✔
1824
        if ast.len() > 1 {
2✔
UNCOV
1825
            panic!("Expected a single query statement, but there are more then 1.")
×
1826
        }
1827
        let query = ast.pop().unwrap();
2✔
1828

1829
        let create_query = CreateQuery::new(&query).unwrap();
2✔
1830

1831
        let table = Table::new(create_query);
1✔
1832

1833
        assert_eq!(table.columns.len(), 6);
2✔
1834
        assert_eq!(table.last_rowid, 0);
1✔
1835

1836
        let id_column = "id".to_string();
1✔
1837
        if let Some(column) = table
3✔
1838
            .columns
1839
            .iter()
1840
            .filter(|c| c.column_name == id_column)
3✔
1841
            .collect::<Vec<&Column>>()
1842
            .first()
1843
        {
1844
            assert!(column.is_pk);
1✔
1845
            assert_eq!(column.datatype, DataType::Integer);
1✔
1846
        } else {
UNCOV
1847
            panic!("column not found");
×
1848
        }
1849
    }
1850

1851
    #[test]
1852
    fn print_table_schema_test() {
3✔
1853
        let query_statement = "CREATE TABLE contacts (
1✔
1854
            id INTEGER PRIMARY KEY,
1855
            first_name TEXT NOT NULL,
1856
            last_name TEXT NOT NULl
1857
        );";
1858
        let dialect = SqlriteDialect::new();
1✔
1859
        let mut ast = Parser::parse_sql(&dialect, query_statement).unwrap();
1✔
1860
        if ast.len() > 1 {
2✔
UNCOV
1861
            panic!("Expected a single query statement, but there are more then 1.")
×
1862
        }
1863
        let query = ast.pop().unwrap();
2✔
1864

1865
        let create_query = CreateQuery::new(&query).unwrap();
2✔
1866

1867
        let table = Table::new(create_query);
1✔
1868
        let lines_printed = table.print_table_schema();
1✔
1869
        assert_eq!(lines_printed, Ok(9));
2✔
1870
    }
1871
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc