• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

joaoh82 / rust_sqlite / 25278185540

03 May 2026 11:41AM UTC coverage: 56.503% (-0.4%) from 56.877%
25278185540

Pull #76

github

web-flow
Merge d243178fb into 91ca6317d
Pull Request #76: cleanup(engine): make process_command stdout-clean (drop REPL-only prints)

20 of 30 new or added lines in 4 files covered. (66.67%)

38 existing lines in 3 files now uncovered.

5465 of 9672 relevant lines covered (56.5%)

1.14 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

65.85
/src/sql/db/table.rs
1
use crate::error::{Result, SQLRiteError};
2
use crate::sql::db::secondary_index::{IndexOrigin, SecondaryIndex};
3
use crate::sql::hnsw::HnswIndex;
4
use crate::sql::parser::create::CreateQuery;
5
use std::collections::{BTreeMap, HashMap};
6
use std::fmt;
7
use std::sync::{Arc, Mutex};
8

9
use prettytable::{Cell as PrintCell, Row as PrintRow, Table as PrintTable};
10

11
/// SQLRite data types
12
/// Mapped after SQLite Data Type Storage Classes and SQLite Affinity Type
13
/// (Datatypes In SQLite Version 3)[https://www.sqlite.org/datatype3.html]
14
///
15
/// `Vector(dim)` is the Phase 7a addition — a fixed-dimension dense f32
16
/// array. The dimension is part of the type so a `VECTOR(384)` column
17
/// rejects `[0.1, 0.2, 0.3]` at INSERT time as a clean type error
18
/// rather than silently storing the wrong shape.
19
#[derive(PartialEq, Debug, Clone)]
20
pub enum DataType {
21
    Integer,
22
    Text,
23
    Real,
24
    Bool,
25
    /// Dense f32 vector of fixed dimension. The `usize` is the column's
26
    /// declared dimension; every value stored in the column must have
27
    /// exactly that many elements.
28
    Vector(usize),
29
    /// Phase 7e — JSON column. Stored as canonical UTF-8 text (matches
30
    /// SQLite's JSON1 extension), validated at INSERT time. The
31
    /// `json_extract` family of functions parses on demand and returns
32
    /// either a primitive `Value` (Integer / Real / Text / Bool / Null)
33
    /// or a Text value carrying the JSON-encoded sub-object/array.
34
    /// Q3 originally specified `bincoded serde_json::Value`, but bincode
35
    /// was removed from the engine in Phase 3c — see the scope-correction
36
    /// note in `docs/phase-7-plan.md` for the rationale on switching to
37
    /// text storage.
38
    Json,
39
    None,
40
    Invalid,
41
}
42

43
impl DataType {
44
    /// Constructs a `DataType` from the wire string the parser produces.
45
    /// Pre-Phase-7 the strings were one-of `"integer" | "text" | "real" |
46
    /// "bool" | "none"`. Phase 7a adds `"vector(N)"` (case-insensitive,
47
    /// N a positive integer) for the new vector column type — encoded
48
    /// in-band so we don't have to plumb a richer type through the
49
    /// existing string-based ParsedColumn pipeline.
50
    pub fn new(cmd: String) -> DataType {
2✔
51
        let lower = cmd.to_lowercase();
4✔
52
        match lower.as_str() {
4✔
53
            "integer" => DataType::Integer,
4✔
54
            "text" => DataType::Text,
6✔
55
            "real" => DataType::Real,
3✔
56
            "bool" => DataType::Bool,
3✔
57
            "json" => DataType::Json,
3✔
58
            "none" => DataType::None,
2✔
59
            other if other.starts_with("vector(") && other.ends_with(')') => {
3✔
60
                // Strip the `vector(` prefix and trailing `)`, parse what's
61
                // left as a positive integer dimension. Anything else is
62
                // Invalid — surfaces a clean error at CREATE TABLE time.
63
                let inside = &other["vector(".len()..other.len() - 1];
2✔
64
                match inside.trim().parse::<usize>() {
1✔
65
                    Ok(dim) if dim > 0 => DataType::Vector(dim),
1✔
66
                    _ => {
×
67
                        eprintln!("Invalid VECTOR dimension in {cmd}");
2✔
68
                        DataType::Invalid
1✔
69
                    }
70
                }
71
            }
72
            _ => {
×
73
                eprintln!("Invalid data type given {}", cmd);
2✔
74
                DataType::Invalid
1✔
75
            }
76
        }
77
    }
78

79
    /// Inverse of `new` — returns the canonical lowercased wire string
80
    /// for this DataType. Used by the parser to round-trip
81
    /// `VECTOR(N)` → `DataType::Vector(N)` → `"vector(N)"` into
82
    /// `ParsedColumn::datatype` so the rest of the pipeline keeps
83
    /// working with strings.
84
    pub fn to_wire_string(&self) -> String {
1✔
85
        match self {
1✔
86
            DataType::Integer => "Integer".to_string(),
×
87
            DataType::Text => "Text".to_string(),
×
88
            DataType::Real => "Real".to_string(),
×
89
            DataType::Bool => "Bool".to_string(),
×
90
            DataType::Vector(dim) => format!("vector({dim})"),
1✔
91
            DataType::Json => "Json".to_string(),
×
92
            DataType::None => "None".to_string(),
×
93
            DataType::Invalid => "Invalid".to_string(),
×
94
        }
95
    }
96
}
97

98
impl fmt::Display for DataType {
99
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1✔
100
        match self {
1✔
101
            DataType::Integer => f.write_str("Integer"),
1✔
102
            DataType::Text => f.write_str("Text"),
1✔
103
            DataType::Real => f.write_str("Real"),
1✔
104
            DataType::Bool => f.write_str("Boolean"),
1✔
105
            DataType::Vector(dim) => write!(f, "Vector({dim})"),
1✔
UNCOV
106
            DataType::Json => f.write_str("Json"),
×
107
            DataType::None => f.write_str("None"),
1✔
108
            DataType::Invalid => f.write_str("Invalid"),
1✔
109
        }
110
    }
111
}
112

113
/// The schema for each SQL Table is represented in memory by
114
/// following structure.
115
///
116
/// `rows` is `Arc<Mutex<...>>` rather than `Rc<RefCell<...>>` so `Table`
117
/// (and by extension `Database`) is `Send + Sync` — the Tauri desktop
118
/// app holds the engine in shared state behind a `Mutex<Database>`, and
119
/// Tauri's state container requires its contents to be thread-safe.
120
#[derive(Debug)]
121
pub struct Table {
122
    /// Name of the table
123
    pub tb_name: String,
124
    /// Schema for each column, in declaration order.
125
    pub columns: Vec<Column>,
126
    /// Per-column row storage, keyed by column name. Every column's
127
    /// `Row::T(BTreeMap)` is keyed by rowid, so all columns share the same
128
    /// keyset after each write.
129
    pub rows: Arc<Mutex<HashMap<String, Row>>>,
130
    /// Secondary indexes on this table (Phase 3e). One auto-created entry
131
    /// per UNIQUE or PRIMARY KEY column; explicit `CREATE INDEX` statements
132
    /// add more. Looking up an index: iterate by column name, or by index
133
    /// name via `Table::index_by_name`.
134
    pub secondary_indexes: Vec<SecondaryIndex>,
135
    /// HNSW indexes on VECTOR columns (Phase 7d.2). Maintained in lockstep
136
    /// with row storage on INSERT (incremental); rebuilt on open from the
137
    /// persisted CREATE INDEX SQL. The graph itself is NOT yet persisted —
138
    /// see Phase 7d.3 for cell-encoded graph storage.
139
    pub hnsw_indexes: Vec<HnswIndexEntry>,
140
    /// ROWID of most recent insert.
141
    pub last_rowid: i64,
142
    /// PRIMARY KEY column name, or "-1" if the table has no PRIMARY KEY.
143
    pub primary_key: String,
144
}
145

146
/// One HNSW index attached to a table. Phase 7d.2 only supports L2
147
/// distance; cosine and dot are 7d.x follow-ups (would require either
148
/// distinct USING methods like `hnsw_cosine` or a `WITH (metric = …)`
149
/// clause — see `docs/phase-7-plan.md` for the deferred decision).
150
#[derive(Debug, Clone)]
151
pub struct HnswIndexEntry {
152
    /// User-supplied name from `CREATE INDEX <name> …`. Unique across
153
    /// both `secondary_indexes` and `hnsw_indexes` on a given table.
154
    pub name: String,
155
    /// The VECTOR column this index covers.
156
    pub column_name: String,
157
    /// The graph itself.
158
    pub index: HnswIndex,
159
    /// Phase 7d.3 — true iff a DELETE or UPDATE-on-vector-col has
160
    /// invalidated the graph since the last rebuild. INSERT maintains
161
    /// the graph incrementally and leaves this false. The next save
162
    /// rebuilds dirty indexes from current rows before serializing.
163
    pub needs_rebuild: bool,
164
}
165

166
impl Table {
167
    pub fn new(create_query: CreateQuery) -> Self {
2✔
168
        let table_name = create_query.table_name;
2✔
169
        let mut primary_key: String = String::from("-1");
2✔
170
        let columns = create_query.columns;
2✔
171

172
        let mut table_cols: Vec<Column> = vec![];
2✔
173
        let table_rows: Arc<Mutex<HashMap<String, Row>>> = Arc::new(Mutex::new(HashMap::new()));
4✔
174
        let mut secondary_indexes: Vec<SecondaryIndex> = Vec::new();
2✔
175
        for col in &columns {
6✔
176
            let col_name = &col.name;
2✔
177
            if col.is_pk {
4✔
178
                primary_key = col_name.to_string();
2✔
179
            }
180
            table_cols.push(Column::new(
4✔
181
                col_name.to_string(),
4✔
182
                col.datatype.to_string(),
2✔
183
                col.is_pk,
2✔
184
                col.not_null,
2✔
185
                col.is_unique,
2✔
186
            ));
187

188
            let dt = DataType::new(col.datatype.to_string());
2✔
189
            let row_storage = match &dt {
2✔
190
                DataType::Integer => Row::Integer(BTreeMap::new()),
4✔
191
                DataType::Real => Row::Real(BTreeMap::new()),
2✔
192
                DataType::Text => Row::Text(BTreeMap::new()),
4✔
193
                DataType::Bool => Row::Bool(BTreeMap::new()),
2✔
194
                // The dimension is enforced at INSERT time against the
195
                // column's declared DataType::Vector(dim). The Row variant
196
                // itself doesn't carry the dim — every stored Vec<f32>
197
                // already has it via .len().
198
                DataType::Vector(_dim) => Row::Vector(BTreeMap::new()),
2✔
199
                // Phase 7e — JSON columns reuse Text storage (with
200
                // INSERT-time validation that the bytes parse as JSON).
201
                // No new Row variant; json_extract / json_type / etc.
202
                // re-parse from text on demand. See `docs/phase-7-plan.md`
203
                // Q3's scope-correction note for the storage choice.
204
                DataType::Json => Row::Text(BTreeMap::new()),
2✔
205
                DataType::Invalid | DataType::None => Row::None,
1✔
206
            };
207
            table_rows
4✔
208
                .lock()
209
                .expect("Table row storage mutex poisoned")
210
                .insert(col.name.to_string(), row_storage);
×
211

212
            // Auto-create an index for every UNIQUE / PRIMARY KEY column,
213
            // but only for types we know how to index. Real / Bool / Vector
214
            // UNIQUE columns fall back to the linear scan path in
215
            // validate_unique_constraint — same behavior as before 3e.
216
            // (Vector UNIQUE is unusual; the linear-scan path will work
217
            // via Value::Vector PartialEq, just at O(N) cost.)
218
            if (col.is_pk || col.is_unique) && matches!(dt, DataType::Integer | DataType::Text) {
2✔
219
                let name = SecondaryIndex::auto_name(&table_name, &col.name);
2✔
220
                match SecondaryIndex::new(
4✔
221
                    name,
2✔
222
                    table_name.clone(),
4✔
223
                    col.name.clone(),
2✔
224
                    &dt,
×
225
                    true,
×
226
                    IndexOrigin::Auto,
×
227
                ) {
228
                    Ok(idx) => secondary_indexes.push(idx),
4✔
229
                    Err(_) => {
×
230
                        // Unreachable given the matches! guard above, but
231
                        // the builder returns Result so we keep the arm.
232
                    }
233
                }
234
            }
235
        }
236

237
        Table {
238
            tb_name: table_name,
239
            columns: table_cols,
240
            rows: table_rows,
241
            secondary_indexes,
242
            // HNSW indexes only land via explicit CREATE INDEX … USING hnsw
243
            // statements (Phase 7d.2); never auto-created at CREATE TABLE
244
            // time, because there's no UNIQUE-style constraint that
245
            // implies a vector index.
246
            hnsw_indexes: Vec::new(),
2✔
247
            last_rowid: 0,
248
            primary_key,
249
        }
250
    }
251

252
    /// Deep-clones a `Table` for transaction snapshots (Phase 4f).
253
    ///
254
    /// The normal `Clone` derive would shallow-clone the `Arc<Mutex<_>>`
255
    /// wrapping our row storage, leaving both copies sharing the same
256
    /// inner map — mutating the snapshot would corrupt the live table
257
    /// and vice versa. Instead we lock, clone the inner `HashMap`, and
258
    /// wrap it in a fresh `Arc<Mutex<_>>`. Columns and indexes derive
259
    /// `Clone` directly (all their fields are plain data).
260
    pub fn deep_clone(&self) -> Self {
1✔
261
        let cloned_rows: HashMap<String, Row> = {
1✔
262
            let guard = self.rows.lock().expect("row mutex poisoned");
1✔
263
            guard.clone()
2✔
264
        };
265
        Table {
266
            tb_name: self.tb_name.clone(),
1✔
267
            columns: self.columns.clone(),
1✔
268
            rows: Arc::new(Mutex::new(cloned_rows)),
2✔
269
            secondary_indexes: self.secondary_indexes.clone(),
1✔
270
            // HnswIndexEntry derives Clone, so the snapshot owns its own
271
            // graph copy. Phase 4f's snapshot-rollback semantics require
272
            // the snapshot to be fully decoupled from live state.
273
            hnsw_indexes: self.hnsw_indexes.clone(),
1✔
274
            last_rowid: self.last_rowid,
1✔
275
            primary_key: self.primary_key.clone(),
1✔
276
        }
277
    }
278

279
    /// Finds an auto- or explicit-index entry for a given column. Returns
280
    /// `None` if the column isn't indexed.
281
    pub fn index_for_column(&self, column: &str) -> Option<&SecondaryIndex> {
1✔
282
        self.secondary_indexes
1✔
283
            .iter()
284
            .find(|i| i.column_name == column)
3✔
285
    }
286

287
    fn index_for_column_mut(&mut self, column: &str) -> Option<&mut SecondaryIndex> {
2✔
288
        self.secondary_indexes
2✔
289
            .iter_mut()
290
            .find(|i| i.column_name == column)
6✔
291
    }
292

293
    /// Finds a secondary index by its own name (e.g., `sqlrite_autoindex_users_email`
294
    /// or a user-provided CREATE INDEX name). Used by Phase 3e.2 to look up
295
    /// explicit indexes when DROP INDEX lands.
296
    #[allow(dead_code)]
297
    pub fn index_by_name(&self, name: &str) -> Option<&SecondaryIndex> {
1✔
298
        self.secondary_indexes.iter().find(|i| i.name == name)
3✔
299
    }
300

301
    /// Returns a `bool` informing if a `Column` with a specific name exists or not
302
    ///
303
    pub fn contains_column(&self, column: String) -> bool {
2✔
304
        self.columns.iter().any(|col| col.column_name == column)
8✔
305
    }
306

307
    /// Returns the list of column names in declaration order.
308
    pub fn column_names(&self) -> Vec<String> {
1✔
309
        self.columns.iter().map(|c| c.column_name.clone()).collect()
3✔
310
    }
311

312
    /// Returns all rowids currently stored in the table, in ascending order.
313
    /// Every column's BTreeMap has the same keyset, so we just read from the first column.
314
    pub fn rowids(&self) -> Vec<i64> {
2✔
315
        let Some(first) = self.columns.first() else {
2✔
316
            return vec![];
×
317
        };
318
        let rows = self.rows.lock().expect("rows mutex poisoned");
2✔
319
        rows.get(&first.column_name)
4✔
320
            .map(|r| r.rowids())
6✔
321
            .unwrap_or_default()
322
    }
323

324
    /// Reads a single cell at `(column, rowid)`.
325
    pub fn get_value(&self, column: &str, rowid: i64) -> Option<Value> {
2✔
326
        let rows = self.rows.lock().expect("rows mutex poisoned");
2✔
327
        rows.get(column).and_then(|r| r.get(rowid))
8✔
328
    }
329

330
    /// Removes the row identified by `rowid` from every column's storage and
331
    /// from every secondary index entry.
332
    pub fn delete_row(&mut self, rowid: i64) {
1✔
333
        // Snapshot the values we're about to delete so we can strip them
334
        // from secondary indexes by (value, rowid) before the row storage
335
        // disappears.
336
        let per_column_values: Vec<(String, Option<Value>)> = self
2✔
337
            .columns
×
338
            .iter()
339
            .map(|c| (c.column_name.clone(), self.get_value(&c.column_name, rowid)))
3✔
340
            .collect();
341

342
        // Remove from row storage.
343
        {
344
            let rows_clone = Arc::clone(&self.rows);
2✔
345
            let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
2✔
346
            for col in &self.columns {
2✔
347
                if let Some(r) = row_data.get_mut(&col.column_name) {
2✔
348
                    match r {
1✔
349
                        Row::Integer(m) => {
1✔
350
                            m.remove(&rowid);
2✔
351
                        }
352
                        Row::Text(m) => {
1✔
353
                            m.remove(&rowid);
2✔
354
                        }
355
                        Row::Real(m) => {
×
356
                            m.remove(&rowid);
×
357
                        }
358
                        Row::Bool(m) => {
×
359
                            m.remove(&rowid);
×
360
                        }
361
                        Row::Vector(m) => {
1✔
362
                            m.remove(&rowid);
2✔
363
                        }
364
                        Row::None => {}
×
365
                    }
366
                }
367
            }
368
        }
369

370
        // Strip secondary-index entries. Non-indexed columns just don't
371
        // show up in secondary_indexes and are no-ops here.
372
        for (col_name, value) in per_column_values {
2✔
373
            if let Some(idx) = self.index_for_column_mut(&col_name) {
2✔
374
                if let Some(v) = value {
2✔
375
                    idx.remove(&v, rowid);
1✔
376
                }
377
            }
378
        }
379
    }
380

381
    /// Replays a single row at `rowid` when loading a table from disk. Takes
382
    /// one typed value per column (in declaration order); `None` means the
383
    /// stored cell carried a NULL for that column. Unlike `insert_row` this
384
    /// trusts the on-disk state and does *not* re-check UNIQUE — we're
385
    /// rebuilding a state that was already consistent when it was saved.
386
    pub fn restore_row(&mut self, rowid: i64, values: Vec<Option<Value>>) -> Result<()> {
2✔
387
        if values.len() != self.columns.len() {
4✔
388
            return Err(SQLRiteError::Internal(format!(
×
389
                "cell has {} values but table '{}' has {} columns",
×
390
                values.len(),
×
391
                self.tb_name,
×
392
                self.columns.len()
×
393
            )));
394
        }
395

396
        let column_names: Vec<String> =
8✔
397
            self.columns.iter().map(|c| c.column_name.clone()).collect();
×
398

399
        for (i, value) in values.into_iter().enumerate() {
6✔
400
            let col_name = &column_names[i];
4✔
401

402
            // Write into the per-column row storage first (scoped borrow so
403
            // the secondary-index update below doesn't fight over `self`).
404
            {
405
                let rows_clone = Arc::clone(&self.rows);
2✔
406
                let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
4✔
407
                let cell = row_data.get_mut(col_name).ok_or_else(|| {
4✔
408
                    SQLRiteError::Internal(format!("Row storage missing for column '{col_name}'"))
×
409
                })?;
410

411
                match (cell, &value) {
7✔
412
                    (Row::Integer(map), Some(Value::Integer(v))) => {
2✔
413
                        map.insert(rowid, *v as i32);
2✔
414
                    }
415
                    (Row::Integer(_), None) => {
×
416
                        return Err(SQLRiteError::Internal(format!(
×
417
                            "Integer column '{col_name}' cannot store NULL — corrupt cell?"
×
418
                        )));
419
                    }
420
                    (Row::Text(map), Some(Value::Text(s))) => {
2✔
421
                        map.insert(rowid, s.clone());
2✔
422
                    }
423
                    (Row::Text(map), None) => {
×
424
                        // Matches the on-insert convention: NULL in Text
425
                        // storage is represented by the literal "Null"
426
                        // sentinel and not added to the index.
427
                        map.insert(rowid, "Null".to_string());
×
428
                    }
429
                    (Row::Real(map), Some(Value::Real(v))) => {
×
430
                        map.insert(rowid, *v as f32);
×
431
                    }
432
                    (Row::Real(_), None) => {
×
433
                        return Err(SQLRiteError::Internal(format!(
×
434
                            "Real column '{col_name}' cannot store NULL — corrupt cell?"
×
435
                        )));
436
                    }
437
                    (Row::Bool(map), Some(Value::Bool(v))) => {
×
438
                        map.insert(rowid, *v);
×
439
                    }
440
                    (Row::Bool(_), None) => {
×
441
                        return Err(SQLRiteError::Internal(format!(
×
442
                            "Bool column '{col_name}' cannot store NULL — corrupt cell?"
×
443
                        )));
444
                    }
445
                    (Row::Vector(map), Some(Value::Vector(v))) => {
1✔
446
                        map.insert(rowid, v.clone());
1✔
447
                    }
448
                    (Row::Vector(_), None) => {
×
449
                        return Err(SQLRiteError::Internal(format!(
×
450
                            "Vector column '{col_name}' cannot store NULL — corrupt cell?"
×
451
                        )));
452
                    }
453
                    (row, v) => {
×
454
                        return Err(SQLRiteError::Internal(format!(
×
455
                            "Type mismatch restoring column '{col_name}': storage {row:?} vs value {v:?}"
×
456
                        )));
457
                    }
458
                }
459
            }
460

461
            // Maintain the secondary index (if any). NULL values are skipped
462
            // by `insert`, matching the "NULL is not indexed" convention.
463
            if let Some(v) = &value {
2✔
464
                if let Some(idx) = self.index_for_column_mut(col_name) {
4✔
465
                    idx.insert(v, rowid)?;
2✔
466
                }
467
            }
468
        }
469

470
        if rowid > self.last_rowid {
4✔
471
            self.last_rowid = rowid;
2✔
472
        }
473
        Ok(())
2✔
474
    }
475

476
    /// Extracts a row as an ordered `Vec<Option<Value>>` matching the column
477
    /// declaration order. Returns `None` entries for columns that hold NULL.
478
    /// Used by `save_database` to turn a table's in-memory state into cells.
479
    pub fn extract_row(&self, rowid: i64) -> Vec<Option<Value>> {
2✔
480
        self.columns
2✔
481
            .iter()
482
            .map(|c| match self.get_value(&c.column_name, rowid) {
6✔
483
                Some(Value::Null) => None,
×
484
                Some(v) => Some(v),
2✔
485
                None => None,
×
486
            })
487
            .collect()
488
    }
489

490
    /// Overwrites the cell at `(column, rowid)` with `new_val`. Enforces the
491
    /// column's datatype and UNIQUE constraint, and updates any secondary
492
    /// index.
493
    ///
494
    /// Returns `Err` if the column doesn't exist, the value type is incompatible,
495
    /// or writing would violate UNIQUE.
496
    pub fn set_value(&mut self, column: &str, rowid: i64, new_val: Value) -> Result<()> {
1✔
497
        let col_index = self
3✔
498
            .columns
×
499
            .iter()
1✔
500
            .position(|c| c.column_name == column)
3✔
501
            .ok_or_else(|| SQLRiteError::General(format!("Column '{column}' not found")))?;
1✔
502

503
        // No-op write — keep storage exactly the same.
504
        let current = self.get_value(column, rowid);
1✔
505
        if current.as_ref() == Some(&new_val) {
2✔
506
            return Ok(());
×
507
        }
508

509
        // Enforce UNIQUE. Prefer an O(log N) index probe if we have one;
510
        // fall back to a full column scan otherwise (Real/Bool UNIQUE
511
        // columns, which don't get auto-indexed).
512
        if self.columns[col_index].is_unique && !matches!(new_val, Value::Null) {
3✔
513
            if let Some(idx) = self.index_for_column(column) {
1✔
514
                for other in idx.lookup(&new_val) {
3✔
515
                    if other != rowid {
1✔
516
                        return Err(SQLRiteError::General(format!(
1✔
517
                            "UNIQUE constraint violated for column '{column}'"
×
518
                        )));
519
                    }
520
                }
521
            } else {
522
                for other in self.rowids() {
×
523
                    if other == rowid {
×
524
                        continue;
×
525
                    }
526
                    if self.get_value(column, other).as_ref() == Some(&new_val) {
×
527
                        return Err(SQLRiteError::General(format!(
×
528
                            "UNIQUE constraint violated for column '{column}'"
×
529
                        )));
530
                    }
531
                }
532
            }
533
        }
534

535
        // Drop the old index entry before writing the new value, so the
536
        // post-write index insert doesn't clash with the previous state.
537
        if let Some(old) = current {
2✔
538
            if let Some(idx) = self.index_for_column_mut(column) {
2✔
539
                idx.remove(&old, rowid);
×
540
            }
541
        }
542

543
        // Write into the column's Row, type-checking against the declared DataType.
544
        let declared = &self.columns[col_index].datatype;
2✔
545
        {
546
            let rows_clone = Arc::clone(&self.rows);
1✔
547
            let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
2✔
548
            let cell = row_data.get_mut(column).ok_or_else(|| {
2✔
549
                SQLRiteError::Internal(format!("Row storage missing for column '{column}'"))
×
550
            })?;
551

552
            match (cell, &new_val, declared) {
2✔
553
                (Row::Integer(m), Value::Integer(v), _) => {
1✔
554
                    m.insert(rowid, *v as i32);
1✔
555
                }
556
                (Row::Real(m), Value::Real(v), _) => {
×
557
                    m.insert(rowid, *v as f32);
×
558
                }
559
                (Row::Real(m), Value::Integer(v), _) => {
×
560
                    m.insert(rowid, *v as f32);
×
561
                }
562
                (Row::Text(m), Value::Text(v), dt) => {
1✔
563
                    // Phase 7e — UPDATE on a JSON column also validates
564
                    // the new text is well-formed JSON, mirroring INSERT.
565
                    if matches!(dt, DataType::Json) {
1✔
566
                        if let Err(e) = serde_json::from_str::<serde_json::Value>(v) {
1✔
567
                            return Err(SQLRiteError::General(format!(
2✔
568
                                "Type mismatch: expected JSON for column '{column}', got '{v}': {e}"
×
569
                            )));
570
                        }
571
                    }
572
                    m.insert(rowid, v.clone());
2✔
573
                }
574
                (Row::Bool(m), Value::Bool(v), _) => {
×
575
                    m.insert(rowid, *v);
×
576
                }
577
                (Row::Vector(m), Value::Vector(v), DataType::Vector(declared_dim)) => {
1✔
578
                    if v.len() != *declared_dim {
1✔
579
                        return Err(SQLRiteError::General(format!(
×
580
                            "Vector dimension mismatch for column '{column}': declared {declared_dim}, got {}",
×
581
                            v.len()
×
582
                        )));
583
                    }
584
                    m.insert(rowid, v.clone());
2✔
585
                }
586
                // NULL writes: store the sentinel "Null" string for Text; for other
587
                // types we leave storage as-is since those BTreeMaps can't hold NULL today.
588
                (Row::Text(m), Value::Null, _) => {
×
589
                    m.insert(rowid, "Null".to_string());
×
590
                }
591
                (_, new, dt) => {
×
592
                    return Err(SQLRiteError::General(format!(
×
593
                        "Type mismatch: cannot assign {} to column '{column}' of type {dt}",
×
594
                        new.to_display_string()
×
595
                    )));
596
                }
597
            }
598
        }
599

600
        // Maintain the secondary index, if any. NULL values are skipped by
601
        // insert per convention.
602
        if !matches!(new_val, Value::Null) {
1✔
603
            if let Some(idx) = self.index_for_column_mut(column) {
2✔
604
                idx.insert(&new_val, rowid)?;
×
605
            }
606
        }
607

608
        Ok(())
1✔
609
    }
610

611
    /// Returns an immutable reference of `sql::db::table::Column` if the table contains a
612
    /// column with the specified key as a column name.
613
    ///
614
    #[allow(dead_code)]
615
    pub fn get_column(&mut self, column_name: String) -> Result<&Column> {
×
616
        if let Some(column) = self
×
617
            .columns
×
618
            .iter()
619
            .filter(|c| c.column_name == column_name)
×
620
            .collect::<Vec<&Column>>()
621
            .first()
622
        {
623
            Ok(column)
×
624
        } else {
625
            Err(SQLRiteError::General(String::from("Column not found.")))
×
626
        }
627
    }
628

629
    /// Validates if columns and values being inserted violate the UNIQUE constraint.
630
    /// PRIMARY KEY columns are automatically UNIQUE. Uses the corresponding
631
    /// secondary index when one exists (O(log N) lookup); falls back to a
632
    /// linear scan for indexable-but-not-indexed situations (e.g. a Real
633
    /// UNIQUE column — Real isn't in the auto-indexed set).
634
    pub fn validate_unique_constraint(
2✔
635
        &mut self,
636
        cols: &Vec<String>,
637
        values: &Vec<String>,
638
    ) -> Result<()> {
639
        for (idx, name) in cols.iter().enumerate() {
4✔
640
            let column = self
4✔
641
                .columns
×
642
                .iter()
2✔
643
                .find(|c| &c.column_name == name)
6✔
644
                .ok_or_else(|| SQLRiteError::General(format!("Column '{name}' not found")))?;
2✔
645
            if !column.is_unique {
2✔
646
                continue;
×
647
            }
648
            let datatype = &column.datatype;
1✔
649
            let val = &values[idx];
1✔
650

651
            // Parse the string value into a runtime Value according to the
652
            // declared column type. If parsing fails the caller's insert
653
            // would also fail with the same error; surface it here so we
654
            // don't emit a misleading "unique OK" on bad input.
655
            let parsed = match datatype {
1✔
656
                DataType::Integer => val.parse::<i64>().map(Value::Integer).map_err(|_| {
1✔
657
                    SQLRiteError::General(format!(
×
658
                        "Type mismatch: expected INTEGER for column '{name}', got '{val}'"
×
659
                    ))
660
                })?,
661
                DataType::Text => Value::Text(val.clone()),
1✔
662
                DataType::Real => val.parse::<f64>().map(Value::Real).map_err(|_| {
×
663
                    SQLRiteError::General(format!(
×
664
                        "Type mismatch: expected REAL for column '{name}', got '{val}'"
×
665
                    ))
666
                })?,
667
                DataType::Bool => val.parse::<bool>().map(Value::Bool).map_err(|_| {
×
668
                    SQLRiteError::General(format!(
×
669
                        "Type mismatch: expected BOOL for column '{name}', got '{val}'"
×
670
                    ))
671
                })?,
672
                DataType::Vector(declared_dim) => {
×
673
                    let parsed_vec = parse_vector_literal(val).map_err(|e| {
×
674
                        SQLRiteError::General(format!(
×
675
                            "Type mismatch: expected VECTOR({declared_dim}) for column '{name}', {e}"
×
676
                        ))
677
                    })?;
678
                    if parsed_vec.len() != *declared_dim {
×
679
                        return Err(SQLRiteError::General(format!(
×
680
                            "Vector dimension mismatch for column '{name}': declared {declared_dim}, got {}",
×
681
                            parsed_vec.len()
×
682
                        )));
683
                    }
684
                    Value::Vector(parsed_vec)
×
685
                }
686
                DataType::Json => {
×
687
                    // JSON values stored as Text. UNIQUE on a JSON column
688
                    // compares the canonical text representation
689
                    // verbatim — `{"a": 1}` and `{"a":1}` are distinct.
690
                    // Document this if anyone actually requests UNIQUE
691
                    // JSON; for MVP, treat-as-text is fine.
692
                    Value::Text(val.clone())
×
693
                }
694
                DataType::None | DataType::Invalid => {
×
695
                    return Err(SQLRiteError::Internal(format!(
×
696
                        "column '{name}' has an unsupported datatype"
×
697
                    )));
698
                }
699
            };
700

701
            if let Some(secondary) = self.index_for_column(name) {
2✔
702
                if secondary.would_violate_unique(&parsed) {
2✔
703
                    return Err(SQLRiteError::General(format!(
×
704
                        "UNIQUE constraint violated for column '{name}': value '{val}' already exists"
×
705
                    )));
706
                }
707
            } else {
708
                // No secondary index (Real / Bool UNIQUE). Linear scan.
709
                for other in self.rowids() {
×
710
                    if self.get_value(name, other).as_ref() == Some(&parsed) {
×
711
                        return Err(SQLRiteError::General(format!(
×
712
                            "UNIQUE constraint violated for column '{name}': value '{val}' already exists"
×
713
                        )));
714
                    }
715
                }
716
            }
717
        }
718
        Ok(())
2✔
719
    }
720

721
    /// Inserts all VALUES in its approprieta COLUMNS, using the ROWID an embedded INDEX on all ROWS
722
    /// Every `Table` keeps track of the `last_rowid` in order to facilitate what the next one would be.
723
    /// One limitation of this data structure is that we can only have one write transaction at a time, otherwise
724
    /// we could have a race condition on the last_rowid.
725
    ///
726
    /// Since we are loosely modeling after SQLite, this is also a limitation of SQLite (allowing only one write transcation at a time),
727
    /// So we are good. :)
728
    ///
729
    /// Returns `Err` (leaving the table unchanged) when the user supplies an
730
    /// incompatibly-typed value — no more panics on bad input.
731
    pub fn insert_row(&mut self, cols: &Vec<String>, values: &Vec<String>) -> Result<()> {
2✔
732
        let mut next_rowid = self.last_rowid + 1;
2✔
733

734
        // Auto-assign INTEGER PRIMARY KEY when the user omits it; otherwise
735
        // adopt the supplied value as the new rowid.
736
        if self.primary_key != "-1" {
2✔
737
            if !cols.iter().any(|col| col == &self.primary_key) {
6✔
738
                // Write the auto-assigned PK into row storage, then sync
739
                // the secondary index.
740
                let val = next_rowid as i32;
2✔
741
                let wrote_integer = {
×
742
                    let rows_clone = Arc::clone(&self.rows);
2✔
743
                    let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
4✔
744
                    let table_col_data = row_data.get_mut(&self.primary_key).ok_or_else(|| {
4✔
745
                        SQLRiteError::Internal(format!(
×
746
                            "Row storage missing for primary key column '{}'",
×
747
                            self.primary_key
×
748
                        ))
749
                    })?;
750
                    match table_col_data {
2✔
751
                        Row::Integer(tree) => {
2✔
752
                            tree.insert(next_rowid, val);
2✔
753
                            true
2✔
754
                        }
755
                        _ => false, // non-integer PK: auto-assign is a no-op
×
756
                    }
757
                };
758
                if wrote_integer {
2✔
759
                    let pk = self.primary_key.clone();
2✔
760
                    if let Some(idx) = self.index_for_column_mut(&pk) {
4✔
761
                        idx.insert(&Value::Integer(val as i64), next_rowid)?;
2✔
762
                    }
763
                }
764
            } else {
765
                for i in 0..cols.len() {
2✔
766
                    if cols[i] == self.primary_key {
2✔
767
                        let val = &values[i];
1✔
768
                        next_rowid = val.parse::<i64>().map_err(|_| {
1✔
769
                            SQLRiteError::General(format!(
×
770
                                "Type mismatch: PRIMARY KEY column '{}' expects INTEGER, got '{val}'",
×
771
                                self.primary_key
×
772
                            ))
773
                        })?;
774
                    }
775
                }
776
            }
777
        }
778

779
        // For every table column, either pick the supplied value or pad with NULL
780
        // so that every column's BTreeMap keeps the same rowid keyset.
781
        let column_names = self
2✔
782
            .columns
×
783
            .iter()
784
            .map(|col| col.column_name.to_string())
6✔
785
            .collect::<Vec<String>>();
786
        let mut j: usize = 0;
2✔
787
        for i in 0..column_names.len() {
4✔
788
            let mut val = String::from("Null");
2✔
789
            let key = &column_names[i];
4✔
790

791
            if let Some(supplied_key) = cols.get(j) {
2✔
792
                if supplied_key == &column_names[i] {
6✔
793
                    val = values[j].to_string();
4✔
794
                    j += 1;
2✔
795
                } else if self.primary_key == column_names[i] {
4✔
796
                    // PK already stored in the auto-assign branch above.
797
                    continue;
×
798
                }
799
            } else if self.primary_key == column_names[i] {
2✔
800
                continue;
×
801
            }
802

803
            // Step 1: write into row storage and compute the typed Value
804
            // we'll hand to the secondary index (if any).
805
            let typed_value: Option<Value> = {
×
806
                let rows_clone = Arc::clone(&self.rows);
4✔
807
                let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
4✔
808
                let table_col_data = row_data.get_mut(key).ok_or_else(|| {
4✔
809
                    SQLRiteError::Internal(format!("Row storage missing for column '{key}'"))
×
810
                })?;
811

812
                match table_col_data {
2✔
813
                    Row::Integer(tree) => {
1✔
814
                        let parsed = val.parse::<i32>().map_err(|_| {
5✔
815
                            SQLRiteError::General(format!(
1✔
816
                                "Type mismatch: expected INTEGER for column '{key}', got '{val}'"
×
817
                            ))
818
                        })?;
819
                        tree.insert(next_rowid, parsed);
1✔
820
                        Some(Value::Integer(parsed as i64))
1✔
821
                    }
822
                    Row::Text(tree) => {
2✔
823
                        // Phase 7e — JSON columns also reach here (they
824
                        // share Row::Text storage with TEXT columns).
825
                        // Validate the value parses as JSON before
826
                        // storing; otherwise we'd happily write
827
                        // `not-json-at-all` and only fail when
828
                        // json_extract tried to parse it later.
829
                        if matches!(self.columns[i].datatype, DataType::Json) && val != "Null" {
5✔
830
                            if let Err(e) = serde_json::from_str::<serde_json::Value>(&val) {
1✔
831
                                return Err(SQLRiteError::General(format!(
2✔
832
                                    "Type mismatch: expected JSON for column '{key}', got '{val}': {e}"
×
833
                                )));
834
                            }
835
                        }
836
                        tree.insert(next_rowid, val.to_string());
4✔
837
                        // "Null" sentinel stays out of the index — it isn't a
838
                        // real user value.
839
                        if val != "Null" {
5✔
840
                            Some(Value::Text(val.to_string()))
2✔
841
                        } else {
842
                            None
1✔
843
                        }
844
                    }
845
                    Row::Real(tree) => {
1✔
846
                        let parsed = val.parse::<f32>().map_err(|_| {
2✔
847
                            SQLRiteError::General(format!(
×
848
                                "Type mismatch: expected REAL for column '{key}', got '{val}'"
×
849
                            ))
850
                        })?;
851
                        tree.insert(next_rowid, parsed);
1✔
852
                        Some(Value::Real(parsed as f64))
1✔
853
                    }
854
                    Row::Bool(tree) => {
×
855
                        let parsed = val.parse::<bool>().map_err(|_| {
×
856
                            SQLRiteError::General(format!(
×
857
                                "Type mismatch: expected BOOL for column '{key}', got '{val}'"
×
858
                            ))
859
                        })?;
860
                        tree.insert(next_rowid, parsed);
×
861
                        Some(Value::Bool(parsed))
×
862
                    }
863
                    Row::Vector(tree) => {
1✔
864
                        // The parser put a bracket-array literal into `val`
865
                        // (e.g. "[0.1,0.2,0.3]"). Parse it back here and
866
                        // dim-check against the column's declared
867
                        // DataType::Vector(N).
868
                        let parsed = parse_vector_literal(&val).map_err(|e| {
2✔
869
                            SQLRiteError::General(format!(
×
870
                                "Type mismatch: expected VECTOR for column '{key}', {e}"
×
871
                            ))
872
                        })?;
873
                        let declared_dim = match &self.columns[i].datatype {
2✔
874
                            DataType::Vector(d) => *d,
1✔
875
                            other => {
×
876
                                return Err(SQLRiteError::Internal(format!(
×
877
                                    "Row::Vector storage on non-Vector column '{key}' (declared as {other})"
×
878
                                )));
879
                            }
880
                        };
881
                        if parsed.len() != declared_dim {
2✔
882
                            return Err(SQLRiteError::General(format!(
1✔
883
                                "Vector dimension mismatch for column '{key}': declared {declared_dim}, got {}",
×
884
                                parsed.len()
2✔
885
                            )));
886
                        }
887
                        tree.insert(next_rowid, parsed.clone());
2✔
888
                        Some(Value::Vector(parsed))
1✔
889
                    }
890
                    Row::None => {
×
891
                        return Err(SQLRiteError::Internal(format!(
×
892
                            "Column '{key}' has no row storage"
×
893
                        )));
894
                    }
895
                }
896
            };
897

898
            // Step 2: maintain the secondary index (if any). insert() is a
899
            // no-op for Value::Null and cheap for other value kinds.
900
            if let Some(v) = typed_value.clone() {
4✔
901
                if let Some(idx) = self.index_for_column_mut(key) {
4✔
902
                    idx.insert(&v, next_rowid)?;
2✔
903
                }
904
            }
905

906
            // Step 3 (Phase 7d.2): maintain any HNSW indexes on this column.
907
            // The HNSW algorithm needs access to other rows' vectors when
908
            // wiring up neighbor edges, so build a get_vec closure that
909
            // pulls from the table's row storage (which we *just* updated
910
            // with the new value).
911
            if let Some(Value::Vector(new_vec)) = typed_value {
5✔
912
                self.maintain_hnsw_on_insert(key, next_rowid, &new_vec);
2✔
913
            }
914
        }
915
        self.last_rowid = next_rowid;
2✔
916
        Ok(())
2✔
917
    }
918

919
    /// After a row insert, push the new (rowid, vector) into every HNSW
920
    /// index whose column matches `column`. Split out of `insert_row` so
921
    /// the borrowing dance — we need both `&self.rows` (read other
922
    /// vectors) and `&mut self.hnsw_indexes` (insert into the graph) —
923
    /// stays localized.
924
    fn maintain_hnsw_on_insert(&mut self, column: &str, rowid: i64, new_vec: &[f32]) {
1✔
925
        // Snapshot the current vector storage so the get_vec closure
926
        // doesn't fight with `&mut self.hnsw_indexes`. For a typical
927
        // HNSW insert we touch ef_construction × log(N) other vectors,
928
        // so the snapshot cost is small relative to the graph wiring.
929
        let mut vec_snapshot: HashMap<i64, Vec<f32>> = HashMap::new();
1✔
930
        {
931
            let row_data = self.rows.lock().expect("rows mutex poisoned");
2✔
932
            if let Some(Row::Vector(map)) = row_data.get(column) {
3✔
933
                for (id, v) in map.iter() {
1✔
934
                    vec_snapshot.insert(*id, v.clone());
1✔
935
                }
936
            }
937
        }
938
        // The new row was just written into row storage — make sure the
939
        // snapshot reflects it (it should, but defensive).
940
        vec_snapshot.insert(rowid, new_vec.to_vec());
1✔
941

942
        for entry in &mut self.hnsw_indexes {
1✔
943
            if entry.column_name == column {
2✔
944
                entry.index.insert(rowid, new_vec, |id| {
2✔
945
                    vec_snapshot.get(&id).cloned().unwrap_or_default()
1✔
946
                });
947
            }
948
        }
949
    }
950

951
    /// Print the table schema to standard output in a pretty formatted way.
952
    ///
953
    /// # Example
954
    ///
955
    /// ```text
956
    /// let table = Table::new(payload);
957
    /// table.print_table_schema();
958
    ///
959
    /// Prints to standard output:
960
    ///    +-------------+-----------+-------------+--------+----------+
961
    ///    | Column Name | Data Type | PRIMARY KEY | UNIQUE | NOT NULL |
962
    ///    +-------------+-----------+-------------+--------+----------+
963
    ///    | id          | Integer   | true        | true   | true     |
964
    ///    +-------------+-----------+-------------+--------+----------+
965
    ///    | name        | Text      | false       | true   | false    |
966
    ///    +-------------+-----------+-------------+--------+----------+
967
    ///    | email       | Text      | false       | false  | false    |
968
    ///    +-------------+-----------+-------------+--------+----------+
969
    /// ```
970
    ///
971
    pub fn print_table_schema(&self) -> Result<usize> {
2✔
972
        let mut table = PrintTable::new();
1✔
973
        table.add_row(row![
3✔
974
            "Column Name",
×
975
            "Data Type",
×
976
            "PRIMARY KEY",
×
977
            "UNIQUE",
×
978
            "NOT NULL"
×
979
        ]);
980

981
        for col in &self.columns {
1✔
982
            table.add_row(row![
7✔
983
                col.column_name,
1✔
984
                col.datatype,
×
985
                col.is_pk,
1✔
986
                col.is_unique,
1✔
987
                col.not_null
1✔
988
            ]);
989
        }
990

991
        table.printstd();
1✔
992
        Ok(table.len() * 2 + 1)
1✔
993
    }
994

995
    /// Print the table data to standard output in a pretty formatted way.
996
    ///
997
    /// # Example
998
    ///
999
    /// ```text
1000
    /// let db_table = db.get_table_mut(table_name.to_string()).unwrap();
1001
    /// db_table.print_table_data();
1002
    ///
1003
    /// Prints to standard output:
1004
    ///     +----+---------+------------------------+
1005
    ///     | id | name    | email                  |
1006
    ///     +----+---------+------------------------+
1007
    ///     | 1  | "Jack"  | "jack@mail.com"        |
1008
    ///     +----+---------+------------------------+
1009
    ///     | 10 | "Bob"   | "bob@main.com"         |
1010
    ///     +----+---------+------------------------+
1011
    ///     | 11 | "Bill"  | "bill@main.com"        |
1012
    ///     +----+---------+------------------------+
1013
    /// ```
1014
    ///
UNCOV
1015
    pub fn print_table_data(&self) {
×
UNCOV
1016
        let mut print_table = PrintTable::new();
×
1017

UNCOV
1018
        let column_names = self
×
1019
            .columns
×
1020
            .iter()
UNCOV
1021
            .map(|col| col.column_name.to_string())
×
1022
            .collect::<Vec<String>>();
1023

1024
        let header_row = PrintRow::new(
UNCOV
1025
            column_names
×
UNCOV
1026
                .iter()
×
UNCOV
1027
                .map(|col| PrintCell::new(col))
×
UNCOV
1028
                .collect::<Vec<PrintCell>>(),
×
1029
        );
1030

UNCOV
1031
        let rows_clone = Arc::clone(&self.rows);
×
UNCOV
1032
        let row_data = rows_clone.lock().expect("rows mutex poisoned");
×
UNCOV
1033
        let first_col_data = row_data
×
UNCOV
1034
            .get(&self.columns.first().unwrap().column_name)
×
1035
            .unwrap();
UNCOV
1036
        let num_rows = first_col_data.count();
×
UNCOV
1037
        let mut print_table_rows: Vec<PrintRow> = vec![PrintRow::new(vec![]); num_rows];
×
1038

UNCOV
1039
        for col_name in &column_names {
×
UNCOV
1040
            let col_val = row_data
×
UNCOV
1041
                .get(col_name)
×
1042
                .expect("Can't find any rows with the given column");
UNCOV
1043
            let columns: Vec<String> = col_val.get_serialized_col_data();
×
1044

UNCOV
1045
            for i in 0..num_rows {
×
UNCOV
1046
                if let Some(cell) = &columns.get(i) {
×
UNCOV
1047
                    print_table_rows[i].add_cell(PrintCell::new(cell));
×
1048
                } else {
1049
                    print_table_rows[i].add_cell(PrintCell::new(""));
×
1050
                }
1051
            }
1052
        }
1053

UNCOV
1054
        print_table.add_row(header_row);
×
UNCOV
1055
        for row in print_table_rows {
×
UNCOV
1056
            print_table.add_row(row);
×
1057
        }
1058

UNCOV
1059
        print_table.printstd();
×
1060
    }
1061
}
1062

1063
/// The schema for each SQL column in every table.
1064
///
1065
/// Per-column index state moved to `Table::secondary_indexes` in Phase 3e —
1066
/// a single `Column` describes the declared schema (name, type, constraints)
1067
/// and nothing more.
1068
#[derive(PartialEq, Debug, Clone)]
1069
pub struct Column {
1070
    pub column_name: String,
1071
    pub datatype: DataType,
1072
    pub is_pk: bool,
1073
    pub not_null: bool,
1074
    pub is_unique: bool,
1075
}
1076

1077
impl Column {
1078
    pub fn new(
2✔
1079
        name: String,
1080
        datatype: String,
1081
        is_pk: bool,
1082
        not_null: bool,
1083
        is_unique: bool,
1084
    ) -> Self {
1085
        let dt = DataType::new(datatype);
4✔
1086
        Column {
1087
            column_name: name,
1088
            datatype: dt,
1089
            is_pk,
1090
            not_null,
1091
            is_unique,
1092
        }
1093
    }
1094
}
1095

1096
/// The schema for each SQL row in every table is represented in memory
1097
/// by following structure
1098
///
1099
/// This is an enum representing each of the available types organized in a BTreeMap
1100
/// data structure, using the ROWID and key and each corresponding type as value
1101
#[derive(PartialEq, Debug, Clone)]
1102
pub enum Row {
1103
    Integer(BTreeMap<i64, i32>),
1104
    Text(BTreeMap<i64, String>),
1105
    Real(BTreeMap<i64, f32>),
1106
    Bool(BTreeMap<i64, bool>),
1107
    /// Phase 7a: dense f32 vector storage. Each `Vec<f32>` should have
1108
    /// length matching the column's declared `DataType::Vector(dim)`,
1109
    /// enforced at INSERT time. The Row variant doesn't carry the dim —
1110
    /// it lives in the column metadata.
1111
    Vector(BTreeMap<i64, Vec<f32>>),
1112
    None,
1113
}
1114

1115
impl Row {
UNCOV
1116
    fn get_serialized_col_data(&self) -> Vec<String> {
×
UNCOV
1117
        match self {
×
UNCOV
1118
            Row::Integer(cd) => cd.values().map(|v| v.to_string()).collect(),
×
UNCOV
1119
            Row::Real(cd) => cd.values().map(|v| v.to_string()).collect(),
×
UNCOV
1120
            Row::Text(cd) => cd.values().map(|v| v.to_string()).collect(),
×
1121
            Row::Bool(cd) => cd.values().map(|v| v.to_string()).collect(),
×
UNCOV
1122
            Row::Vector(cd) => cd.values().map(format_vector_for_display).collect(),
×
1123
            Row::None => panic!("Found None in columns"),
×
1124
        }
1125
    }
1126

UNCOV
1127
    fn count(&self) -> usize {
×
UNCOV
1128
        match self {
×
UNCOV
1129
            Row::Integer(cd) => cd.len(),
×
1130
            Row::Real(cd) => cd.len(),
×
UNCOV
1131
            Row::Text(cd) => cd.len(),
×
1132
            Row::Bool(cd) => cd.len(),
×
1133
            Row::Vector(cd) => cd.len(),
×
1134
            Row::None => panic!("Found None in columns"),
×
1135
        }
1136
    }
1137

1138
    /// Every column's BTreeMap is keyed by ROWID. All columns share the same keyset
1139
    /// after an INSERT (missing columns are padded), so any column's keys are a valid
1140
    /// iteration of the table's rowids.
1141
    pub fn rowids(&self) -> Vec<i64> {
2✔
1142
        match self {
2✔
1143
            Row::Integer(m) => m.keys().copied().collect(),
2✔
1144
            Row::Text(m) => m.keys().copied().collect(),
2✔
1145
            Row::Real(m) => m.keys().copied().collect(),
×
1146
            Row::Bool(m) => m.keys().copied().collect(),
×
1147
            Row::Vector(m) => m.keys().copied().collect(),
×
1148
            Row::None => vec![],
×
1149
        }
1150
    }
1151

1152
    pub fn get(&self, rowid: i64) -> Option<Value> {
2✔
1153
        match self {
2✔
1154
            Row::Integer(m) => m.get(&rowid).map(|v| Value::Integer(i64::from(*v))),
6✔
1155
            // INSERT stores the literal string "Null" in Text columns that were omitted
1156
            // from the query — re-map that back to a real NULL on read.
1157
            Row::Text(m) => m.get(&rowid).map(|v| {
4✔
1158
                if v == "Null" {
4✔
1159
                    Value::Null
1✔
1160
                } else {
1161
                    Value::Text(v.clone())
2✔
1162
                }
1163
            }),
1164
            Row::Real(m) => m.get(&rowid).map(|v| Value::Real(f64::from(*v))),
3✔
1165
            Row::Bool(m) => m.get(&rowid).map(|v| Value::Bool(*v)),
×
1166
            Row::Vector(m) => m.get(&rowid).map(|v| Value::Vector(v.clone())),
3✔
1167
            Row::None => None,
×
1168
        }
1169
    }
1170
}
1171

1172
/// Render a vector for human display. Used by both `Row::get_serialized_col_data`
1173
/// (for the REPL's print-table path) and `Value::to_display_string`.
1174
///
1175
/// Format: `[0.1, 0.2, 0.3]` — JSON-like, decimal-minimal via `{}` Display.
1176
/// For high-dimensional vectors (e.g. 384 elements) this produces a long
1177
/// line; truncation ellipsis is a future polish (see Phase 7 plan, "What
1178
/// this proposal does NOT commit to").
1179
fn format_vector_for_display(v: &Vec<f32>) -> String {
1✔
1180
    let mut s = String::with_capacity(v.len() * 6 + 2);
1✔
1181
    s.push('[');
1✔
1182
    for (i, x) in v.iter().enumerate() {
1✔
1183
        if i > 0 {
1✔
1184
            s.push_str(", ");
1✔
1185
        }
1186
        // Default f32 Display picks the minimal-roundtrip representation,
1187
        // so 0.1f32 prints as "0.1" not "0.10000000149011612". Good enough.
1188
        s.push_str(&x.to_string());
2✔
1189
    }
1190
    s.push(']');
1✔
1191
    s
1✔
1192
}
1193

1194
/// Runtime value produced by query execution. Separate from the on-disk `Row` enum
1195
/// so the executor can carry typed values (including NULL) across operators.
1196
#[derive(Debug, Clone, PartialEq)]
1197
pub enum Value {
1198
    Integer(i64),
1199
    Text(String),
1200
    Real(f64),
1201
    Bool(bool),
1202
    /// Phase 7a: dense f32 vector as a runtime value. Carries its own
1203
    /// dimension implicitly via `Vec::len`; the column it's being
1204
    /// assigned to has a declared `DataType::Vector(N)` that's checked
1205
    /// at INSERT/UPDATE time.
1206
    Vector(Vec<f32>),
1207
    Null,
1208
}
1209

1210
impl Value {
1211
    pub fn to_display_string(&self) -> String {
1✔
1212
        match self {
1✔
1213
            Value::Integer(v) => v.to_string(),
1✔
1214
            Value::Text(s) => s.clone(),
1✔
1215
            Value::Real(f) => f.to_string(),
×
1216
            Value::Bool(b) => b.to_string(),
×
1217
            Value::Vector(v) => format_vector_for_display(v),
1✔
1218
            Value::Null => String::from("NULL"),
1219
        }
1220
    }
1221
}
1222

1223
/// Parse a bracket-array literal like `"[0.1, 0.2, 0.3]"` (or `"[1, 2, 3]"`)
1224
/// into a `Vec<f32>`. The parser/insert pipeline stores vector literals as
1225
/// strings in `InsertQuery::rows` (a `Vec<Vec<String>>`); this helper is
1226
/// the inverse — turn the string back into a typed vector at the boundary
1227
/// where we actually need element-typed data.
1228
///
1229
/// Accepts:
1230
/// - `[]` → empty vector (caller's dimension check rejects it for VECTOR(N≥1))
1231
/// - `[0.1, 0.2, 0.3]` → standard float syntax
1232
/// - `[1, 2, 3]` → integers, coerced to f32 (matches `VALUES (1, 2)` for
1233
///   `REAL` columns; we widen ints to floats automatically)
1234
/// - whitespace tolerated everywhere (Python/JSON/pgvector convention)
1235
///
1236
/// Rejects with a descriptive message:
1237
/// - missing `[` / `]`
1238
/// - non-numeric elements (`['foo', 0.1]`)
1239
/// - NaN / Inf literals (we accept them via `f32::from_str` but caller can
1240
///   reject if undesired — for now we let them through; HNSW etc. will
1241
///   reject NaN at index time)
1242
pub fn parse_vector_literal(s: &str) -> Result<Vec<f32>> {
1✔
1243
    let trimmed = s.trim();
1✔
1244
    if !trimmed.starts_with('[') || !trimmed.ends_with(']') {
2✔
1245
        return Err(SQLRiteError::General(format!(
1✔
1246
            "expected bracket-array literal `[...]`, got `{s}`"
1247
        )));
1248
    }
1249
    let inner = &trimmed[1..trimmed.len() - 1].trim();
2✔
1250
    if inner.is_empty() {
1✔
1251
        return Ok(Vec::new());
1✔
1252
    }
1253
    let mut out = Vec::new();
1✔
1254
    for (i, part) in inner.split(',').enumerate() {
2✔
1255
        let element = part.trim();
2✔
1256
        let parsed: f32 = element.parse().map_err(|_| {
3✔
1257
            SQLRiteError::General(format!("vector element {i} (`{element}`) is not a number"))
1✔
1258
        })?;
1259
        out.push(parsed);
1✔
1260
    }
1261
    Ok(out)
1✔
1262
}
1263

1264
#[cfg(test)]
1265
mod tests {
1266
    use super::*;
1267
    use sqlparser::dialect::SQLiteDialect;
1268
    use sqlparser::parser::Parser;
1269

1270
    #[test]
1271
    fn datatype_display_trait_test() {
3✔
1272
        let integer = DataType::Integer;
1✔
1273
        let text = DataType::Text;
1✔
1274
        let real = DataType::Real;
1✔
1275
        let boolean = DataType::Bool;
1✔
1276
        let vector = DataType::Vector(384);
1✔
1277
        let none = DataType::None;
1✔
1278
        let invalid = DataType::Invalid;
1✔
1279

1280
        assert_eq!(format!("{}", integer), "Integer");
1✔
1281
        assert_eq!(format!("{}", text), "Text");
1✔
1282
        assert_eq!(format!("{}", real), "Real");
1✔
1283
        assert_eq!(format!("{}", boolean), "Boolean");
1✔
1284
        assert_eq!(format!("{}", vector), "Vector(384)");
1✔
1285
        assert_eq!(format!("{}", none), "None");
1✔
1286
        assert_eq!(format!("{}", invalid), "Invalid");
1✔
1287
    }
1288

1289
    // -----------------------------------------------------------------
1290
    // Phase 7a — VECTOR(N) column type
1291
    // -----------------------------------------------------------------
1292

1293
    #[test]
1294
    fn datatype_new_parses_vector_dim() {
3✔
1295
        // Standard cases.
1296
        assert_eq!(DataType::new("vector(1)".to_string()), DataType::Vector(1));
1✔
1297
        assert_eq!(
1✔
1298
            DataType::new("vector(384)".to_string()),
1✔
1299
            DataType::Vector(384)
1300
        );
1301
        assert_eq!(
1✔
1302
            DataType::new("vector(1536)".to_string()),
1✔
1303
            DataType::Vector(1536)
1304
        );
1305

1306
        // Case-insensitive on the keyword.
1307
        assert_eq!(
1✔
1308
            DataType::new("VECTOR(384)".to_string()),
1✔
1309
            DataType::Vector(384)
1310
        );
1311

1312
        // Whitespace inside parens tolerated (the create-parser strips it
1313
        // but the string-based round-trip in DataType::new is the one place
1314
        // we don't fully control input formatting).
1315
        assert_eq!(
1✔
1316
            DataType::new("vector( 64 )".to_string()),
1✔
1317
            DataType::Vector(64)
1318
        );
1319
    }
1320

1321
    #[test]
1322
    fn datatype_new_rejects_bad_vector_strings() {
3✔
1323
        // dim = 0 is rejected (Q2: VECTOR(N≥1)).
1324
        assert_eq!(DataType::new("vector(0)".to_string()), DataType::Invalid);
1✔
1325
        // Non-numeric dim.
1326
        assert_eq!(DataType::new("vector(abc)".to_string()), DataType::Invalid);
1✔
1327
        // Empty parens.
1328
        assert_eq!(DataType::new("vector()".to_string()), DataType::Invalid);
1✔
1329
        // Negative dim wouldn't even parse as usize, so falls into Invalid.
1330
        assert_eq!(DataType::new("vector(-3)".to_string()), DataType::Invalid);
1✔
1331
    }
1332

1333
    #[test]
1334
    fn datatype_to_wire_string_round_trips_vector() {
3✔
1335
        let dt = DataType::Vector(384);
1✔
1336
        let wire = dt.to_wire_string();
1✔
1337
        assert_eq!(wire, "vector(384)");
2✔
1338
        // And feeds back through DataType::new losslessly — this is the
1339
        // round-trip the ParsedColumn pipeline relies on.
1340
        assert_eq!(DataType::new(wire), DataType::Vector(384));
1✔
1341
    }
1342

1343
    #[test]
1344
    fn parse_vector_literal_accepts_floats() {
3✔
1345
        let v = parse_vector_literal("[0.1, 0.2, 0.3]").expect("parse");
1✔
1346
        assert_eq!(v, vec![0.1f32, 0.2, 0.3]);
2✔
1347
    }
1348

1349
    #[test]
1350
    fn parse_vector_literal_accepts_ints_widening_to_f32() {
3✔
1351
        let v = parse_vector_literal("[1, 2, 3]").expect("parse");
1✔
1352
        assert_eq!(v, vec![1.0f32, 2.0, 3.0]);
2✔
1353
    }
1354

1355
    #[test]
1356
    fn parse_vector_literal_handles_negatives_and_whitespace() {
3✔
1357
        let v = parse_vector_literal("[ -1.5 ,  2.0,  -3.5 ]").expect("parse");
1✔
1358
        assert_eq!(v, vec![-1.5f32, 2.0, -3.5]);
2✔
1359
    }
1360

1361
    #[test]
1362
    fn parse_vector_literal_empty_brackets_is_empty_vec() {
3✔
1363
        let v = parse_vector_literal("[]").expect("parse");
1✔
1364
        assert!(v.is_empty());
2✔
1365
    }
1366

1367
    #[test]
1368
    fn parse_vector_literal_rejects_non_bracketed() {
3✔
1369
        assert!(parse_vector_literal("0.1, 0.2").is_err());
1✔
1370
        assert!(parse_vector_literal("(0.1, 0.2)").is_err());
1✔
1371
        assert!(parse_vector_literal("[0.1, 0.2").is_err()); // missing ]
1✔
1372
        assert!(parse_vector_literal("0.1, 0.2]").is_err()); // missing [
1✔
1373
    }
1374

1375
    #[test]
1376
    fn parse_vector_literal_rejects_non_numeric_elements() {
4✔
1377
        let err = parse_vector_literal("[1.0, 'foo', 3.0]").unwrap_err();
1✔
1378
        let msg = format!("{err}");
2✔
1379
        assert!(
×
1380
            msg.contains("vector element 1") && msg.contains("'foo'"),
3✔
1381
            "error message should pinpoint the bad element: got `{msg}`"
1382
        );
1383
    }
1384

1385
    #[test]
1386
    fn value_vector_display_format() {
3✔
1387
        let v = Value::Vector(vec![0.1, 0.2, 0.3]);
1✔
1388
        assert_eq!(v.to_display_string(), "[0.1, 0.2, 0.3]");
2✔
1389

1390
        // Empty vector displays as `[]`.
1391
        let empty = Value::Vector(vec![]);
1✔
1392
        assert_eq!(empty.to_display_string(), "[]");
2✔
1393
    }
1394

1395
    #[test]
1396
    fn create_new_table_test() {
3✔
1397
        let query_statement = "CREATE TABLE contacts (
1✔
1398
            id INTEGER PRIMARY KEY,
1399
            first_name TEXT NOT NULL,
1400
            last_name TEXT NOT NULl,
1401
            email TEXT NOT NULL UNIQUE,
1402
            active BOOL,
1403
            score REAL
1404
        );";
1405
        let dialect = SQLiteDialect {};
1406
        let mut ast = Parser::parse_sql(&dialect, query_statement).unwrap();
1✔
1407
        if ast.len() > 1 {
2✔
1408
            panic!("Expected a single query statement, but there are more then 1.")
×
1409
        }
1410
        let query = ast.pop().unwrap();
2✔
1411

1412
        let create_query = CreateQuery::new(&query).unwrap();
2✔
1413

1414
        let table = Table::new(create_query);
1✔
1415

1416
        assert_eq!(table.columns.len(), 6);
2✔
1417
        assert_eq!(table.last_rowid, 0);
1✔
1418

1419
        let id_column = "id".to_string();
1✔
1420
        if let Some(column) = table
3✔
1421
            .columns
1422
            .iter()
1423
            .filter(|c| c.column_name == id_column)
3✔
1424
            .collect::<Vec<&Column>>()
1425
            .first()
1426
        {
1427
            assert!(column.is_pk);
1✔
1428
            assert_eq!(column.datatype, DataType::Integer);
1✔
1429
        } else {
1430
            panic!("column not found");
×
1431
        }
1432
    }
1433

1434
    #[test]
1435
    fn print_table_schema_test() {
3✔
1436
        let query_statement = "CREATE TABLE contacts (
1✔
1437
            id INTEGER PRIMARY KEY,
1438
            first_name TEXT NOT NULL,
1439
            last_name TEXT NOT NULl
1440
        );";
1441
        let dialect = SQLiteDialect {};
1442
        let mut ast = Parser::parse_sql(&dialect, query_statement).unwrap();
1✔
1443
        if ast.len() > 1 {
2✔
1444
            panic!("Expected a single query statement, but there are more then 1.")
×
1445
        }
1446
        let query = ast.pop().unwrap();
2✔
1447

1448
        let create_query = CreateQuery::new(&query).unwrap();
2✔
1449

1450
        let table = Table::new(create_query);
1✔
1451
        let lines_printed = table.print_table_schema();
1✔
1452
        assert_eq!(lines_printed, Ok(9));
2✔
1453
    }
1454
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc