• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

joaoh82 / rust_sqlite / 27325105780

11 Jun 2026 05:05AM UTC coverage: 69.525% (+0.02%) from 69.507%
27325105780

Pull #166

github

web-flow
Merge 106821ed2 into 961cefac0
Pull Request #166: fix(sql): error on unknown columns in single-table scope (SQLR-2)

28 of 36 new or added lines in 1 file covered. (77.78%)

2 existing lines in 2 files now uncovered.

11790 of 16958 relevant lines covered (69.52%)

1.25 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.39
/src/sql/executor.rs
1
//! Query executors — evaluate parsed SQL statements against the in-memory
2
//! storage and produce formatted output.
3

4
use std::cmp::Ordering;
5

6
use prettytable::{Cell as PrintCell, Row as PrintRow, Table as PrintTable};
7
use sqlparser::ast::{
8
    AlterTable, AlterTableOperation, AssignmentTarget, BinaryOperator, CreateIndex, Delete, Expr,
9
    FromTable, FunctionArg, FunctionArgExpr, FunctionArguments, Ident, IndexType, ObjectName,
10
    ObjectNamePart, RenameTableNameKind, Statement, TableFactor, TableWithJoins, UnaryOperator,
11
    Update, Value as AstValue,
12
};
13

14
use crate::error::{Result, SQLRiteError};
15
use crate::sql::agg::{AggState, DistinctKey, like_match};
16
use crate::sql::db::database::Database;
17
use crate::sql::db::secondary_index::{IndexOrigin, SecondaryIndex};
18
use crate::sql::db::table::{
19
    DataType, FtsIndexEntry, HnswIndexEntry, Table, Value, parse_vector_literal,
20
};
21
use crate::sql::fts::{Bm25Params, PostingList};
22
use crate::sql::hnsw::{DistanceMetric, HnswIndex};
23
use crate::sql::parser::select::{
24
    AggregateArg, AggregateFn, GroupByKey, JoinConstraintKind, JoinType, OrderByClause, Projection,
25
    ProjectionItem, ProjectionKind, SelectQuery, parse_aggregate_call,
26
};
27

28
// -----------------------------------------------------------------
29
// SQLR-5 — Row-scope abstraction
30
// -----------------------------------------------------------------
31
//
32
// Single-table SELECT / UPDATE / DELETE evaluate WHERE / ORDER BY /
33
// projection expressions over `(&Table, rowid)`. JOIN evaluation
34
// needs the same expression evaluator to look up columns across
35
// multiple tables, with NULL padding for unmatched outer-join rows.
36
//
37
// Rather than fork the evaluator, we abstract "what's in scope when
38
// I see a column reference" behind a trait. Every callsite that
39
// previously took `(table, rowid)` now takes `&dyn RowScope`. The
40
// single-table case constructs a tiny `SingleTableScope`; the join
41
// case constructs a `JoinedScope` that knows about every table in
42
// scope plus the per-table rowid (or `None` for a NULL-padded row).
43
//
44
// The trait stays small on purpose:
45
//
46
//   - `lookup` resolves a column reference (`col` or `t.col`) to a
47
//     `Value`. Unknown columns error in both scopes (SQLR-2).
48
//     NULL-padded joined rows yield `Value::Null` for any column
49
//     from their side. Ambiguous unqualified references in joined
50
//     scope error.
51
//
52
//   - `single_table_view` lets index-probing helpers (FTS, HNSW,
53
//     vec_distance) bail out cleanly when invoked over a join — they
54
//     need a `(Table, rowid)` pair to look up an index, and the
55
//     joined case can't answer without per-call disambiguation we
56
//     haven't plumbed yet. Returns `None` in joined scope.
57
pub(crate) trait RowScope {
58
    fn lookup(&self, qualifier: Option<&str>, col: &str) -> Result<Value>;
59

60
    /// `Some((table, rowid))` for a single-table scope; `None` for a
61
    /// joined scope. v1 join support delegates "needs single-table"
62
    /// helpers (FTS / HNSW / vec_distance with column args) to the
63
    /// single-table path; calling them from a joined query produces
64
    /// a `NotImplemented` error rather than wrong results.
65
    fn single_table_view(&self) -> Option<(&Table, i64)>;
66
}
67

68
/// The default scope for non-join queries: one table, one rowid.
69
pub(crate) struct SingleTableScope<'a> {
70
    table: &'a Table,
71
    rowid: i64,
72
}
73

74
impl<'a> SingleTableScope<'a> {
75
    pub(crate) fn new(table: &'a Table, rowid: i64) -> Self {
1✔
76
        Self { table, rowid }
77
    }
78
}
79

80
impl RowScope for SingleTableScope<'_> {
81
    fn lookup(&self, qualifier: Option<&str>, col: &str) -> Result<Value> {
1✔
82
        // The qualifier (if any) is ignored — we only have one table
83
        // in scope, so `t.col` resolves the same as `col`. Validating
84
        // it against the table name/alias requires plumbing the FROM
85
        // alias through every `eval_expr` callsite (SQLR-2 follow-up).
UNCOV
86
        let _ = qualifier;
×
87
        // SQLR-2 — unknown columns error, matching `JoinedScope`. A
88
        // schema column whose cell was never written (omitted from the
89
        // INSERT column list) still reads as NULL.
90
        if !self.table.contains_column(col.to_string()) {
1✔
91
            return Err(SQLRiteError::Internal(format!(
1✔
NEW
92
                "Column '{col}' does not exist on table '{}'",
×
NEW
93
                self.table.tb_name
×
94
            )));
95
        }
96
        Ok(self.table.get_value(col, self.rowid).unwrap_or(Value::Null))
1✔
97
    }
98

99
    fn single_table_view(&self) -> Option<(&Table, i64)> {
1✔
100
        Some((self.table, self.rowid))
1✔
101
    }
102
}
103

104
/// One table participating in a joined query, plus the user-visible
105
/// name to match against `t.col` qualifiers (alias if present, else
106
/// the bare table name).
107
pub(crate) struct JoinedTableRef<'a> {
108
    pub table: &'a Table,
109
    pub scope_name: String,
110
}
111

112
/// Multi-table scope used during join execution. `rowids[i]` is the
113
/// rowid in `tables[i]`, or `None` for a NULL-padded row coming out
114
/// of an outer join.
115
pub(crate) struct JoinedScope<'a> {
116
    pub tables: &'a [JoinedTableRef<'a>],
117
    pub rowids: &'a [Option<i64>],
118
}
119

120
impl RowScope for JoinedScope<'_> {
121
    fn lookup(&self, qualifier: Option<&str>, col: &str) -> Result<Value> {
1✔
122
        if let Some(q) = qualifier {
1✔
123
            // Qualified reference: pick the matching table; if it's
124
            // NULL-padded, the column is NULL; else fetch from row.
125
            let pos = self
3✔
126
                .tables
×
127
                .iter()
1✔
128
                .position(|t| t.scope_name.eq_ignore_ascii_case(q))
3✔
129
                .ok_or_else(|| {
2✔
130
                    SQLRiteError::Internal(format!(
1✔
131
                        "unknown table qualifier '{q}' in column reference '{q}.{col}'"
×
132
                    ))
133
                })?;
134
            if !self.tables[pos].table.contains_column(col.to_string()) {
1✔
135
                return Err(SQLRiteError::Internal(format!(
×
136
                    "column '{col}' does not exist on '{}'",
×
137
                    self.tables[pos].scope_name
×
138
                )));
139
            }
140
            return Ok(match self.rowids[pos] {
3✔
141
                None => Value::Null,
1✔
142
                Some(r) => self.tables[pos]
2✔
143
                    .table
×
144
                    .get_value(col, r)
1✔
145
                    .unwrap_or(Value::Null),
1✔
146
            });
147
        }
148
        // Unqualified: search every in-scope table. Exactly-one match
149
        // wins; zero matches → unknown column; multi matches →
150
        // ambiguous, prompt the user to qualify.
151
        let mut hit: Option<usize> = None;
1✔
152
        for (i, t) in self.tables.iter().enumerate() {
2✔
153
            if t.table.contains_column(col.to_string()) {
2✔
154
                if hit.is_some() {
1✔
155
                    return Err(SQLRiteError::Internal(format!(
1✔
156
                        "column reference '{col}' is ambiguous — qualify it as <table>.{col}"
×
157
                    )));
158
                }
159
                hit = Some(i);
1✔
160
            }
161
        }
162
        let i = hit.ok_or_else(|| {
1✔
163
            SQLRiteError::Internal(format!(
×
164
                "unknown column '{col}' in joined SELECT (no in-scope table has it)"
×
165
            ))
166
        })?;
167
        Ok(match self.rowids[i] {
2✔
168
            None => Value::Null,
×
169
            Some(r) => self.tables[i]
2✔
170
                .table
×
171
                .get_value(col, r)
1✔
172
                .unwrap_or(Value::Null),
1✔
173
        })
174
    }
175

176
    fn single_table_view(&self) -> Option<(&Table, i64)> {
×
177
        None
×
178
    }
179
}
180

181
/// Executes a parsed `SelectQuery` against the database and returns a
182
/// human-readable rendering of the result set (prettytable). Also returns
183
/// the number of rows produced, for the top-level status message.
184
/// Structured result of a SELECT: column names in projection order,
185
/// and each matching row as a `Vec<Value>` aligned with the columns.
186
/// Phase 5a introduced this so the public `Connection` / `Statement`
187
/// API has typed rows to yield; the existing `execute_select` that
188
/// returns pre-rendered text is now a thin wrapper on top.
189
pub struct SelectResult {
190
    pub columns: Vec<String>,
191
    pub rows: Vec<Vec<Value>>,
192
}
193

194
/// Executes a SELECT and returns structured rows. The typed rows are
195
/// what the new public API streams to callers; the REPL / Tauri app
196
/// pre-render into a prettytable via `execute_select`.
197
pub fn execute_select_rows(query: SelectQuery, db: &Database) -> Result<SelectResult> {
1✔
198
    // SQLR-5 — joined SELECTs go through a dedicated executor that
199
    // knows how to thread a multi-table scope through expression
200
    // evaluation. The single-table fast path below stays untouched
201
    // (and so do its HNSW / FTS / bounded-heap optimizations).
202
    if !query.joins.is_empty() {
2✔
203
        return execute_select_rows_joined(query, db);
2✔
204
    }
205

206
    // SQLR-10 — `SELECT … FROM sqlrite_master` introspects the catalog.
207
    // The catalog isn't a live entry in `db.tables` (it's materialized at
208
    // save time), so we synthesize a read-only in-memory snapshot on
209
    // demand and run the normal single-table path against it. WHERE /
210
    // projections / ORDER BY / LIMIT all work unchanged. Writes against
211
    // sqlrite_master remain rejected (it never lands in `db.tables`), and
212
    // joins against it are not supported (the joined path doesn't
213
    // synthesize it).
214
    let master_snapshot;
215
    let table: &Table = if query.table_name == crate::sql::pager::MASTER_TABLE_NAME {
3✔
216
        master_snapshot = crate::sql::pager::build_master_table_snapshot(db)?;
2✔
217
        &master_snapshot
1✔
218
    } else {
219
        db.get_table(query.table_name.clone()).map_err(|_| {
5✔
220
            SQLRiteError::Internal(format!("Table '{}' not found", query.table_name))
2✔
221
        })?
222
    };
223

224
    // SQLR-3: Materialize the projection as `Vec<ProjectionItem>` so
225
    // both the simple-row path and the aggregation path can iterate the
226
    // same shape. `Projection::All` expands to bare-column items in
227
    // declaration order; that path then runs the existing rowid pipeline.
228
    let proj_items: Vec<ProjectionItem> = match &query.projection {
1✔
229
        Projection::All => table
2✔
230
            .column_names()
231
            .into_iter()
232
            .map(|c| ProjectionItem {
3✔
233
                kind: ProjectionKind::Column {
1✔
234
                    qualifier: None,
1✔
235
                    name: c,
236
                },
237
                alias: None,
1✔
238
            })
239
            .collect(),
240
        Projection::Items(items) => items.clone(),
2✔
241
    };
242
    let has_aggregates = proj_items
3✔
243
        .iter()
244
        .any(|i| matches!(i.kind, ProjectionKind::Aggregate(_)));
3✔
245
    // Validate bare-column references against the table schema.
246
    for item in &proj_items {
1✔
247
        if let ProjectionKind::Column { name: c, .. } = &item.kind
2✔
248
            && !table.contains_column(c.clone())
1✔
249
        {
250
            return Err(SQLRiteError::Internal(format!(
1✔
251
                "Column '{c}' does not exist on table '{}'",
252
                query.table_name
253
            )));
254
        }
255
    }
256
    for g in &query.group_by {
1✔
257
        if !table.contains_column(g.name.clone()) {
2✔
258
            return Err(SQLRiteError::Internal(format!(
×
259
                "GROUP BY references unknown column '{}' on table '{}'",
260
                g.name, query.table_name
261
            )));
262
        }
263
    }
264
    // Collect matching rowids. If the WHERE is the shape `col = literal`
265
    // and `col` has a secondary index, probe the index for an O(log N)
266
    // seek; otherwise fall back to the full table scan.
267
    let matching = match select_rowids(table, query.selection.as_ref())? {
1✔
268
        RowidSource::IndexProbe(rowids) => rowids,
1✔
269
        RowidSource::FullScan => {
270
            let mut out = Vec::new();
1✔
271
            for rowid in table.rowids() {
3✔
272
                if let Some(expr) = &query.selection
2✔
273
                    && !eval_predicate(expr, table, rowid)?
2✔
274
                {
275
                    continue;
276
                }
277
                out.push(rowid);
2✔
278
            }
279
            out
1✔
280
        }
281
    };
282
    let mut matching = matching;
1✔
283

284
    let aggregating = has_aggregates || !query.group_by.is_empty();
3✔
285

286
    // SQLR-3: aggregation path. When the SELECT contains aggregates or a
287
    // GROUP BY, the rowid-shaped optimizations (HNSW / FTS / bounded
288
    // heap) don't compose with grouping — every row contributes to its
289
    // group, so we walk the full filtered rowid set, accumulate, then
290
    // sort/truncate the resulting *output rows*.
291
    if aggregating {
1✔
292
        let (all_items, having_expr) = lower_having_into_hidden_slots(&query, &proj_items)?;
2✔
293

294
        // Validate aggregate column args (visible + HAVING-hidden).
295
        for item in &all_items {
2✔
296
            if let ProjectionKind::Aggregate(call) = &item.kind
2✔
297
                && let AggregateArg::Column { name: c, .. } = &call.arg
1✔
298
                && !table.contains_column(c.clone())
1✔
299
            {
300
                return Err(SQLRiteError::Internal(format!(
×
301
                    "{}({}) references unknown column '{c}' on table '{}'",
302
                    call.func.as_str(),
×
303
                    c,
304
                    query.table_name
305
                )));
306
            }
307
        }
308

309
        let scopes = matching.iter().map(|&r| SingleTableScope::new(table, r));
3✔
310
        return run_aggregation_pipeline(scopes, &query, &proj_items, &all_items, &having_expr);
1✔
311
    }
312

313
    // Non-aggregating path — same flow as before, with the extra
314
    // affordances that (a) the projection list now goes through
315
    // `ProjectionItem` and (b) DISTINCT applies after row materialization.
316

317
    // Phase 7c — bounded-heap top-k optimization.
318
    //
319
    // The naive "ORDER BY <expr>" path (Phase 7b) sorts every matching
320
    // rowid: O(N log N) sort_by + a truncate. For KNN queries
321
    //
322
    //     SELECT id FROM docs
323
    //     ORDER BY vec_distance_l2(embedding, [...])
324
    //     LIMIT 10;
325
    //
326
    // N is the table row count and k is the LIMIT. With a bounded
327
    // max-heap of size k we can find the top-k in O(N log k) — same
328
    // sort_by-per-row cost on the heap operations, but k is typically
329
    // 10-100 while N can be millions.
330
    //
331
    // Phase 7d.2 — HNSW ANN probe.
332
    //
333
    // Even better than the bounded heap: if the ORDER BY expression is
334
    // exactly `vec_distance_l2(<col>, <bracket-array literal>)` AND
335
    // `<col>` has an HNSW index attached, skip the linear scan
336
    // entirely and probe the graph in O(log N). Approximate but
337
    // typically ≥ 0.95 recall (verified by the recall tests in
338
    // src/sql/hnsw.rs).
339
    //
340
    // We branch in cases:
341
    //   1. ORDER BY + LIMIT k matches the HNSW probe pattern  → graph probe.
342
    //   2. ORDER BY + LIMIT k matches the FTS probe pattern   → posting probe.
343
    //   3. ORDER BY + LIMIT k where k < |matching|            → bounded heap (7c).
344
    //   4. ORDER BY without LIMIT, or LIMIT >= |matching|     → full sort.
345
    //   5. LIMIT without ORDER BY                              → just truncate.
346
    //
347
    // DISTINCT is applied post-projection (we'd over-truncate if LIMIT
348
    // ran before DISTINCT had a chance to collapse duplicates), so when
349
    // DISTINCT is on we defer truncation past the dedupe step.
350
    let defer_limit_for_distinct = query.distinct;
1✔
351
    match (&query.order_by, query.limit) {
2✔
352
        (Some(order), Some(k)) if try_hnsw_probe(table, &order.expr, k).is_some() => {
3✔
353
            matching = try_hnsw_probe(table, &order.expr, k).unwrap();
1✔
354
        }
355
        (Some(order), Some(k))
2✔
356
            if try_fts_probe(table, &order.expr, order.ascending, k).is_some() =>
1✔
357
        {
358
            matching = try_fts_probe(table, &order.expr, order.ascending, k).unwrap();
1✔
359
        }
360
        (Some(order), Some(k)) if !defer_limit_for_distinct && k < matching.len() => {
1✔
361
            matching = select_topk(&matching, table, order, k)?;
2✔
362
        }
363
        (Some(order), _) => {
1✔
364
            sort_rowids(&mut matching, table, order)?;
3✔
365
            if let Some(k) = query.limit
1✔
366
                && !defer_limit_for_distinct
1✔
367
            {
368
                matching.truncate(k);
1✔
369
            }
370
        }
371
        (None, Some(k)) if !defer_limit_for_distinct => {
×
372
            matching.truncate(k);
×
373
        }
374
        _ => {}
375
    }
376

377
    let columns: Vec<String> = proj_items.iter().map(|i| i.output_name()).collect();
4✔
378
    let projected_cols: Vec<String> = proj_items
1✔
379
        .iter()
380
        .map(|i| match &i.kind {
3✔
381
            ProjectionKind::Column { name, .. } => name.clone(),
1✔
382
            ProjectionKind::Aggregate(_) => unreachable!("aggregation handled above"),
×
383
        })
384
        .collect();
385

386
    // Build typed rows. Missing cells surface as `Value::Null` — that
387
    // maps a column-not-present-for-this-rowid case onto the public
388
    // `Row::get` → `Option<T>` surface cleanly.
389
    let mut rows: Vec<Vec<Value>> = Vec::with_capacity(matching.len());
2✔
390
    for rowid in &matching {
2✔
391
        let row: Vec<Value> = projected_cols
1✔
392
            .iter()
393
            .map(|col| table.get_value(col, *rowid).unwrap_or(Value::Null))
3✔
394
            .collect();
395
        rows.push(row);
1✔
396
    }
397

398
    if query.distinct {
1✔
399
        rows = dedupe_rows(rows);
1✔
400
        if let Some(k) = query.limit {
1✔
401
            rows.truncate(k);
×
402
        }
403
    }
404

405
    Ok(SelectResult { columns, rows })
1✔
406
}
407

408
/// A join constraint resolved against the live table schemas: the
409
/// concrete `ON` predicate to evaluate, plus the columns that
410
/// `SELECT *` should show once (empty for a plain `ON` join, non-empty
411
/// for `USING` / `NATURAL`).
412
struct ResolvedJoin {
413
    on: Expr,
414
    using_columns: Vec<String>,
415
}
416

417
/// Turn a [`JoinConstraintKind`] into the `ON` predicate the nested-loop
418
/// driver evaluates. `tables[..right_pos]` are the tables in scope on
419
/// the left of this join; `tables[right_pos]` is the table being joined.
420
///
421
/// - `On` passes its predicate through unchanged.
422
/// - `Using(cols)` becomes `left.col = right.col` AND-chained over every
423
///   named column. The left qualifier is the first in-scope table that
424
///   actually has the column, so the rewrite is correct for join chains
425
///   (`A JOIN B USING(x) JOIN C USING(x)` resolves both `x`es against
426
///   `A`). A column missing from either side is an error.
427
/// - `Natural` discovers the shared column names first (right table's
428
///   columns that also appear somewhere on the left), then proceeds
429
///   exactly like `Using`. No shared columns ⇒ an always-true predicate,
430
///   i.e. a cross product, matching SQLite.
431
fn resolve_join_constraint(
1✔
432
    constraint: &JoinConstraintKind,
433
    tables: &[JoinedTableRef<'_>],
434
    right_pos: usize,
435
) -> Result<ResolvedJoin> {
436
    match constraint {
1✔
437
        JoinConstraintKind::On(expr) => Ok(ResolvedJoin {
2✔
438
            on: (**expr).clone(),
2✔
439
            using_columns: Vec::new(),
1✔
440
        }),
441
        JoinConstraintKind::Using(cols) => build_using_join(cols, tables, right_pos),
1✔
442
        JoinConstraintKind::Natural => {
443
            // Shared columns = the right table's columns that also exist
444
            // on some left table, preserving the right table's column
445
            // order for determinism.
446
            let shared: Vec<String> = tables[right_pos]
2✔
447
                .table
448
                .column_names()
449
                .into_iter()
450
                .filter(|c| {
2✔
451
                    tables[..right_pos]
1✔
452
                        .iter()
1✔
453
                        .any(|t| t.table.contains_column(c.clone()))
3✔
454
                })
455
                .collect();
456
            build_using_join(&shared, tables, right_pos)
2✔
457
        }
458
    }
459
}
460

461
/// Shared lowering for `USING` and `NATURAL`: synthesize the AND-chain
462
/// of `left.col = right.col` equalities and report the deduplicated
463
/// columns. An empty `cols` (a `NATURAL` join with nothing in common)
464
/// yields an always-true predicate and no dedup, i.e. a cross product.
465
fn build_using_join(
1✔
466
    cols: &[String],
467
    tables: &[JoinedTableRef<'_>],
468
    right_pos: usize,
469
) -> Result<ResolvedJoin> {
470
    let right = &tables[right_pos];
1✔
471
    let mut predicate: Option<Expr> = None;
1✔
472
    for col in cols {
3✔
473
        // The named column must exist on the right side …
474
        if !right.table.contains_column(col.clone()) {
2✔
475
            return Err(SQLRiteError::Internal(format!(
2✔
476
                "cannot join USING column '{col}' — it is not present on table '{}'",
477
                right.scope_name
478
            )));
479
        }
480
        // … and on at least one left-side table. Qualify the left
481
        // reference with whichever table actually has it.
482
        let left = tables[..right_pos]
3✔
483
            .iter()
1✔
484
            .find(|t| t.table.contains_column(col.clone()))
3✔
485
            .ok_or_else(|| {
1✔
486
                SQLRiteError::Internal(format!(
×
487
                    "cannot join USING column '{col}' — it is not present on any left-side table"
488
                ))
489
            })?;
490
        let eq = col_eq(&left.scope_name, &right.scope_name, col);
1✔
491
        predicate = Some(match predicate {
2✔
492
            None => eq,
1✔
493
            Some(prev) => Expr::BinaryOp {
2✔
494
                left: Box::new(prev),
2✔
495
                op: BinaryOperator::And,
1✔
496
                right: Box::new(eq),
1✔
497
            },
498
        });
499
    }
500
    Ok(ResolvedJoin {
1✔
501
        on: predicate
1✔
502
            .unwrap_or_else(|| Expr::Value(sqlparser::ast::Value::Boolean(true).with_empty_span())),
3✔
503
        using_columns: cols.to_vec(),
1✔
504
    })
505
}
506

507
/// Build the `left_scope.col = right_scope.col` equality used to lower
508
/// `USING` / `NATURAL` joins onto the existing `ON` evaluation path.
509
fn col_eq(left_scope: &str, right_scope: &str, col: &str) -> Expr {
2✔
510
    let col_ref = |scope: &str| {
2✔
511
        Expr::CompoundIdentifier(vec![
2✔
512
            Ident::new(scope.to_string()),
2✔
513
            Ident::new(col.to_string()),
2✔
514
        ])
515
    };
516
    Expr::BinaryOp {
517
        left: Box::new(col_ref(left_scope)),
1✔
518
        op: BinaryOperator::Eq,
519
        right: Box::new(col_ref(right_scope)),
2✔
520
    }
521
}
522

523
// -----------------------------------------------------------------
524
// SQLR-5 — Joined SELECT execution
525
// -----------------------------------------------------------------
526
//
527
// The strategy is a left-folded nested-loop join: start with the
528
// rowids of the leading FROM table, then for each JOIN clause
529
// combine the accumulator (`Vec<Vec<Option<i64>>>`) with the rowids
530
// of the next table. Each join flavor differs only in how it
531
// handles unmatched left / right rows:
532
//
533
//   INNER       — drop unmatched on both sides
534
//   LEFT OUTER  — keep every left row; pad right side with NULL
535
//   RIGHT OUTER — keep every right row; pad left side with NULL
536
//   FULL OUTER  — keep both unmatched sets, NULL-padding the other
537
//
538
// This isn't a hash join — every join is O(N×M) in the size of the
539
// accumulator and the right table. Adequate for SQLRite's "embedded
540
// learning database" niche; a future phase could layer hash / merge
541
// joins on equi-join shapes without changing the surface API.
542
//
543
// SQLR-6 — aggregates / GROUP BY / DISTINCT compose with joins: the
544
// fully-joined row stream feeds the same scope-generic aggregation
545
// pipeline the single-table path uses (Stage 3.5 below), and DISTINCT
546
// dedupes the projected output rows.
547
fn execute_select_rows_joined(query: SelectQuery, db: &Database) -> Result<SelectResult> {
2✔
548
    // Resolve every participating table once and capture its scope
549
    // name (alias if supplied, else table name). Scope names are
550
    // case-sensitive in matching the original identifier text;
551
    // qualifier matches in `JoinedScope::lookup` use
552
    // `eq_ignore_ascii_case` so `T1.c1` works whether the user
553
    // wrote `T1`, `t1`, or `T1` differently than the alias.
554
    let mut joined_tables: Vec<JoinedTableRef<'_>> = Vec::with_capacity(1 + query.joins.len());
2✔
555

556
    let primary = db
1✔
557
        .get_table(query.table_name.clone())
2✔
558
        .map_err(|_| SQLRiteError::Internal(format!("Table '{}' not found", query.table_name)))?;
1✔
559
    joined_tables.push(JoinedTableRef {
1✔
560
        table: primary,
561
        scope_name: query
1✔
562
            .table_alias
563
            .clone()
1✔
564
            .unwrap_or_else(|| query.table_name.clone()),
3✔
565
    });
566
    for j in &query.joins {
1✔
567
        let t = db
1✔
568
            .get_table(j.right_table.clone())
2✔
569
            .map_err(|_| SQLRiteError::Internal(format!("Table '{}' not found", j.right_table)))?;
1✔
570
        joined_tables.push(JoinedTableRef {
1✔
571
            table: t,
572
            scope_name: j
1✔
573
                .right_alias
574
                .clone()
1✔
575
                .unwrap_or_else(|| j.right_table.clone()),
3✔
576
        });
577
    }
578

579
    // Reject duplicate scope names — `FROM t JOIN t ON ...` without
580
    // an alias on one side would silently collapse qualifiers and
581
    // produce confusing results. Forcing the user to alias one side
582
    // keeps `t1.col` / `t2.col` unambiguous.
583
    {
584
        let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
1✔
585
        for t in &joined_tables {
2✔
586
            let key = t.scope_name.to_ascii_lowercase();
2✔
587
            if !seen.insert(key) {
1✔
588
                return Err(SQLRiteError::Internal(format!(
1✔
589
                    "duplicate table reference '{}' in FROM/JOIN — use AS to alias one side",
590
                    t.scope_name
591
                )));
592
            }
593
        }
594
    }
595

596
    // Resolve each join's match constraint into a concrete ON predicate
597
    // (plus, for USING / NATURAL, the set of columns that `SELECT *`
598
    // shows once). This is done here rather than at parse time because
599
    // USING needs to know which side each named column lives on, and
600
    // NATURAL needs the schemas to discover the shared columns at all —
601
    // neither is available to the parser. `resolved[i]` lines up with
602
    // `query.joins[i]` (i.e. `joined_tables[i + 1]`).
603
    let resolved: Vec<ResolvedJoin> = query
4✔
604
        .joins
605
        .iter()
1✔
606
        .enumerate()
1✔
607
        .map(|(j_idx, join)| resolve_join_constraint(&join.constraint, &joined_tables, j_idx + 1))
3✔
608
        .collect::<Result<Vec<_>>>()?;
2✔
609

610
    // Validate qualified projection column references against the
611
    // table they qualify. Unqualified names are validated by the
612
    // first scope lookup at row materialization — the runtime check
613
    // there gives the same "ambiguous / unknown" message we'd want
614
    // here, so we don't pre-resolve them.
615
    let proj_items: Vec<ProjectionItem> = match &query.projection {
1✔
616
        Projection::All => {
617
            // `SELECT *` over a join expands to every column of every
618
            // in-scope table, in source order. We use the bare column
619
            // name as both the projected identifier and the output
620
            // header — qualified expansion (`t1.col`) would force
621
            // composite headers like `t1.col` which conflict with
622
            // alias-less convention. Duplicate header names are
623
            // permitted (matches SQLite); callers needing
624
            // disambiguation can `SELECT t.col AS t_col`.
625
            //
626
            // USING / NATURAL columns are the exception: SQLite shows a
627
            // joined-on column once, taking the left side's copy and
628
            // omitting the right side's. We honor that by skipping any
629
            // column listed in the right table's `using_columns` when we
630
            // reach that table during expansion. (The left copy was
631
            // already emitted by an earlier table.)
632
            let mut all = Vec::new();
1✔
633
            for (t_idx, t) in joined_tables.iter().enumerate() {
2✔
634
                // `t_idx == 0` is the primary table (no incoming join);
635
                // every later table corresponds to `resolved[t_idx - 1]`.
636
                let dedup: &[String] = t_idx
1✔
637
                    .checked_sub(1)
638
                    .map(|r| resolved[r].using_columns.as_slice())
3✔
639
                    .unwrap_or(&[]);
1✔
640
                for col in t.table.column_names() {
3✔
641
                    if dedup.contains(&col) {
2✔
642
                        continue;
643
                    }
644
                    all.push(ProjectionItem {
1✔
645
                        kind: ProjectionKind::Column {
1✔
646
                            // Qualify the synthetic items so duplicate
647
                            // column names across tables route to the
648
                            // right side at projection time. The output
649
                            // header still uses the bare `name`.
650
                            qualifier: Some(t.scope_name.clone()),
2✔
651
                            name: col,
1✔
652
                        },
653
                        alias: None,
1✔
654
                    });
655
                }
656
            }
657
            all
1✔
658
        }
659
        Projection::Items(items) => items.clone(),
2✔
660
    };
661

662
    let columns: Vec<String> = proj_items.iter().map(|i| i.output_name()).collect();
4✔
663

664
    // Stage 1: enumerate rows of the leading table. The accumulator
665
    // is `Vec<Vec<Option<i64>>>` where each inner `Vec` is a join
666
    // row whose i-th slot is the rowid of `joined_tables[i]` (or
667
    // None for a NULL-padded row from an outer join).
668
    let mut acc: Vec<Vec<Option<i64>>> = primary
669
        .rowids()
670
        .into_iter()
671
        .map(|r| {
3✔
672
            let mut row = Vec::with_capacity(joined_tables.len());
1✔
673
            row.push(Some(r));
1✔
674
            row
1✔
675
        })
676
        .collect();
677

678
    // Stage 2: fold each JOIN clause into the accumulator. After
679
    // join `i`, every row in `acc` has length `i + 2` (primary +
680
    // i+1 right tables joined). Unmatched-side handling depends on
681
    // the join flavor.
682
    for (j_idx, join) in query.joins.iter().enumerate() {
2✔
683
        let right_pos = j_idx + 1;
2✔
684
        let right_table = joined_tables[right_pos].table;
2✔
685
        let right_rowids: Vec<i64> = right_table.rowids();
1✔
686

687
        // Track which right rowids matched at least once across the
688
        // entire left accumulator. Used by RIGHT / FULL to emit
689
        // unmatched right rows after the loop.
690
        let mut right_matched: Vec<bool> = vec![false; right_rowids.len()];
2✔
691

692
        let mut next_acc: Vec<Vec<Option<i64>>> = Vec::with_capacity(acc.len());
2✔
693

694
        // ON evaluation only sees tables that are in scope *at this
695
        // join level* — the leading FROM table plus every right
696
        // table joined so far, including the one we're matching.
697
        // Restricting the scope means a typo like `JOIN c ON a.id =
698
        // c.id JOIN c ON ...` (referencing `c` before it joins)
699
        // surfaces as "unknown table qualifier 'c'" rather than
700
        // silently `NULL → false`-ing every row.
701
        let on_scope_tables: &[JoinedTableRef<'_>] = &joined_tables[..=right_pos];
2✔
702

703
        for left_row in acc.into_iter() {
3✔
704
            // Build a row prefix and extend it with each candidate
705
            // right rowid; record whether any matched (for outer
706
            // padding on the left side).
707
            let mut left_match_count = 0usize;
1✔
708
            for (r_idx, &rrid) in right_rowids.iter().enumerate() {
3✔
709
                let mut on_rowids: Vec<Option<i64>> = left_row.clone();
2✔
710
                on_rowids.push(Some(rrid));
1✔
711
                debug_assert_eq!(on_rowids.len(), on_scope_tables.len());
1✔
712
                let scope = JoinedScope {
713
                    tables: on_scope_tables,
714
                    rowids: &on_rowids,
1✔
715
                };
716
                // Reuse `eval_predicate_scope` so ON shares the same
717
                // truthiness rule WHERE uses — non-zero integers are
718
                // truthy, NULL is false, etc. — instead of rejecting
719
                // anything that isn't a literal bool. `resolved[j_idx].on`
720
                // is the user's ON expr, or the equality we synthesized
721
                // for USING / NATURAL.
722
                if eval_predicate_scope(&resolved[j_idx].on, &scope)? {
1✔
723
                    left_match_count += 1;
1✔
724
                    right_matched[r_idx] = true;
2✔
725
                    // Accumulator entries carry only as many slots
726
                    // as join levels processed so far; the next
727
                    // iteration extends them again. No trailing
728
                    // padding needed here.
729
                    next_acc.push(on_rowids);
1✔
730
                }
731
            }
732

733
            if left_match_count == 0
2✔
734
                && matches!(join.join_type, JoinType::LeftOuter | JoinType::FullOuter)
1✔
735
            {
736
                // Outer-join NULL pad on the right side: keep the
737
                // left row, push None for the right rowid.
738
                let mut padded = left_row;
1✔
739
                padded.push(None);
1✔
740
                next_acc.push(padded);
1✔
741
            }
742
        }
743

744
        // Right-only emission for RIGHT / FULL: any right rowid that
745
        // never matched on the entire accumulator surfaces with all
746
        // left positions NULL-padded.
747
        if matches!(join.join_type, JoinType::RightOuter | JoinType::FullOuter) {
1✔
748
            for (r_idx, matched) in right_matched.iter().enumerate() {
2✔
749
                if *matched {
1✔
750
                    continue;
751
                }
752
                let mut row: Vec<Option<i64>> = vec![None; right_pos];
1✔
753
                row.push(Some(right_rowids[r_idx]));
2✔
754
                next_acc.push(row);
1✔
755
            }
756
        }
757

758
        acc = next_acc;
1✔
759
    }
760

761
    // Stage 3: apply WHERE on each fully-joined row. Outer-join
762
    // NULL-padded rows where WHERE references a NULL'd column will
763
    // (per SQL three-valued logic) be excluded — this is the same
764
    // posture as the single-table path.
765
    let mut filtered: Vec<Vec<Option<i64>>> = if let Some(where_expr) = &query.selection {
2✔
766
        let mut out = Vec::with_capacity(acc.len());
2✔
767
        for row in acc {
4✔
768
            let scope = JoinedScope {
769
                tables: &joined_tables,
1✔
770
                rowids: &row,
1✔
771
            };
772
            if eval_predicate_scope(where_expr, &scope)? {
1✔
773
                out.push(row);
1✔
774
            }
775
        }
776
        out
1✔
777
    } else {
778
        acc
1✔
779
    };
780

781
    // Stage 3.5 — SQLR-6: aggregation over the joined row stream. The
782
    // fully-joined, WHERE-filtered rows are just another row source
783
    // for the shared grouping accumulator: each joined row becomes a
784
    // `JoinedScope` and feeds the same pipeline the single-table path
785
    // uses (grouping, HAVING, DISTINCT, output-row ORDER BY, LIMIT).
786
    // NULL-padded outer-join rows group under a NULL key and are
787
    // skipped by `COUNT(col)` like any other NULL.
788
    let has_aggregates = proj_items
3✔
789
        .iter()
790
        .any(|i| matches!(i.kind, ProjectionKind::Aggregate(_)));
3✔
791
    if has_aggregates || !query.group_by.is_empty() {
2✔
792
        let (all_items, having_expr) = lower_having_into_hidden_slots(&query, &proj_items)?;
2✔
793

794
        // Validate every column reference against the joined scope up
795
        // front: GROUP BY keys and aggregate args must resolve to
796
        // exactly one in-scope table, and every bare projection column
797
        // must resolve to the same table+column as some GROUP BY key
798
        // (the joined-scope equivalent of the parser's single-table
799
        // "must appear in GROUP BY" check).
800
        for g in &query.group_by {
3✔
801
            resolve_scope_column(&joined_tables, g.qualifier.as_deref(), &g.name)?;
3✔
802
        }
803
        for item in &all_items {
1✔
804
            match &item.kind {
1✔
805
                ProjectionKind::Aggregate(call) => {
1✔
806
                    if let AggregateArg::Column { qualifier, name } = &call.arg {
3✔
807
                        resolve_scope_column(&joined_tables, qualifier.as_deref(), name)?;
1✔
808
                    }
809
                }
810
                ProjectionKind::Column { qualifier, name } => {
1✔
811
                    let pos = resolve_scope_column(&joined_tables, qualifier.as_deref(), name)?;
1✔
812
                    let in_group_by = query.group_by.iter().any(|g| {
2✔
813
                        g.name == *name
1✔
814
                            && resolve_scope_column(&joined_tables, g.qualifier.as_deref(), &g.name)
3✔
815
                                == Ok(pos)
2✔
816
                    });
817
                    if !in_group_by {
1✔
818
                        return Err(SQLRiteError::Internal(format!(
1✔
819
                            "column '{name}' must appear in GROUP BY or be used in an \
820
                             aggregate function"
821
                        )));
822
                    }
823
                }
824
            }
825
        }
826

827
        let scopes = filtered.iter().map(|row| JoinedScope {
3✔
828
            tables: &joined_tables,
1✔
829
            rowids: row,
1✔
830
        });
831
        return run_aggregation_pipeline(scopes, &query, &proj_items, &all_items, &having_expr);
1✔
832
    }
833

834
    // Stage 4: ORDER BY across the joined scope. We pre-compute the
835
    // sort key per row (same approach as `sort_rowids`) so the
836
    // comparator runs on Values, not against the expression tree.
837
    if let Some(order) = &query.order_by {
2✔
838
        // Validate up front so a bad ORDER BY surfaces a clear
839
        // error before sort starts.
840
        let mut keys: Vec<(usize, Value)> = Vec::with_capacity(filtered.len());
2✔
841
        for (i, row) in filtered.iter().enumerate() {
3✔
842
            let scope = JoinedScope {
843
                tables: &joined_tables,
1✔
844
                rowids: row,
845
            };
846
            let v = eval_expr_scope(&order.expr, &scope)?;
1✔
847
            keys.push((i, v));
1✔
848
        }
849
        keys.sort_by(|(_, a), (_, b)| {
3✔
850
            let ord = compare_values(Some(a), Some(b));
1✔
851
            if order.ascending { ord } else { ord.reverse() }
1✔
852
        });
853
        let mut sorted = Vec::with_capacity(filtered.len());
1✔
854
        for (i, _) in keys {
3✔
855
            sorted.push(filtered[i].clone());
2✔
856
        }
857
        filtered = sorted;
1✔
858
    }
859

860
    // Stage 5: LIMIT. SQLR-6 — when DISTINCT is on, truncating the
861
    // joined rows here would over-truncate (duplicates collapse later),
862
    // so the limit is deferred past the dedupe step, mirroring the
863
    // single-table path.
864
    if let Some(k) = query.limit
2✔
865
        && !query.distinct
1✔
866
    {
867
        filtered.truncate(k);
1✔
868
    }
869

870
    // Stage 6: project. For each row, evaluate every projection item
871
    // through the joined scope.
872
    let mut rows: Vec<Vec<Value>> = Vec::with_capacity(filtered.len());
2✔
873
    for row in &filtered {
3✔
874
        let scope = JoinedScope {
875
            tables: &joined_tables,
1✔
876
            rowids: row,
877
        };
878
        let mut out_row = Vec::with_capacity(proj_items.len());
1✔
879
        for item in &proj_items {
2✔
880
            let v = match &item.kind {
1✔
881
                ProjectionKind::Column { qualifier, name } => {
1✔
882
                    scope.lookup(qualifier.as_deref(), name)?
2✔
883
                }
884
                ProjectionKind::Aggregate(_) => {
885
                    // Aggregates are handled by the Stage 3.5 pipeline,
886
                    // which returns before reaching this projection —
887
                    // defense in depth keeps the pattern match total.
888
                    return Err(SQLRiteError::Internal(
×
889
                        "aggregate projection reached the non-aggregating join path".to_string(),
×
890
                    ));
891
                }
892
            };
893
            out_row.push(v);
1✔
894
        }
895
        rows.push(out_row);
1✔
896
    }
897

898
    // SQLR-6 — SELECT DISTINCT over a join: dedupe the projected
899
    // output rows, then apply the LIMIT that Stage 5 deferred.
900
    if query.distinct {
1✔
901
        rows = dedupe_rows(rows);
1✔
902
        if let Some(k) = query.limit {
1✔
903
            rows.truncate(k);
1✔
904
        }
905
    }
906

907
    Ok(SelectResult { columns, rows })
1✔
908
}
909

910
/// Resolve an optionally-qualified column reference to the index of
911
/// the in-scope joined table that owns it. Schema-only counterpart of
912
/// [`JoinedScope::lookup`]: a qualified reference must name a known
913
/// scope and the column must exist there; an unqualified reference
914
/// must exist on exactly one in-scope table (zero → unknown column,
915
/// several → ambiguous).
916
fn resolve_scope_column(
1✔
917
    tables: &[JoinedTableRef<'_>],
918
    qualifier: Option<&str>,
919
    name: &str,
920
) -> Result<usize> {
921
    if let Some(q) = qualifier {
1✔
922
        let pos = tables
1✔
923
            .iter()
1✔
924
            .position(|t| t.scope_name.eq_ignore_ascii_case(q))
3✔
925
            .ok_or_else(|| {
1✔
926
                SQLRiteError::Internal(format!(
×
927
                    "unknown table qualifier '{q}' in column reference '{q}.{name}'"
928
                ))
929
            })?;
930
        if !tables[pos].table.contains_column(name.to_string()) {
1✔
931
            return Err(SQLRiteError::Internal(format!(
×
932
                "column '{name}' does not exist on '{}'",
933
                tables[pos].scope_name
×
934
            )));
935
        }
936
        return Ok(pos);
1✔
937
    }
938
    let mut hit: Option<usize> = None;
1✔
939
    for (i, t) in tables.iter().enumerate() {
2✔
940
        if t.table.contains_column(name.to_string()) {
2✔
941
            if hit.is_some() {
1✔
942
                return Err(SQLRiteError::Internal(format!(
1✔
943
                    "column reference '{name}' is ambiguous — qualify it as <table>.{name}"
944
                )));
945
            }
946
            hit = Some(i);
1✔
947
        }
948
    }
949
    hit.ok_or_else(|| {
×
950
        SQLRiteError::Internal(format!(
×
951
            "unknown column '{name}' in joined SELECT (no in-scope table has it)"
952
        ))
953
    })
954
}
955

956
/// Executes a SELECT and returns `(rendered_table, row_count)`. The
957
/// REPL and Tauri app use this to keep the table-printing behaviour
958
/// the engine has always shipped. Structured callers use
959
/// `execute_select_rows` instead.
960
pub fn execute_select(query: SelectQuery, db: &Database) -> Result<(String, usize)> {
1✔
961
    let result = execute_select_rows(query, db)?;
1✔
962
    let row_count = result.rows.len();
2✔
963

964
    let mut print_table = PrintTable::new();
1✔
965
    let header_cells: Vec<PrintCell> = result.columns.iter().map(|c| PrintCell::new(c)).collect();
4✔
966
    print_table.add_row(PrintRow::new(header_cells));
1✔
967

968
    for row in &result.rows {
1✔
969
        let cells: Vec<PrintCell> = row
1✔
970
            .iter()
971
            .map(|v| PrintCell::new(&v.to_display_string()))
3✔
972
            .collect();
973
        print_table.add_row(PrintRow::new(cells));
1✔
974
    }
975

976
    Ok((print_table.to_string(), row_count))
1✔
977
}
978

979
/// Executes a DELETE statement. Returns the number of rows removed.
980
pub fn execute_delete(stmt: &Statement, db: &mut Database) -> Result<usize> {
1✔
981
    let Statement::Delete(Delete {
1✔
982
        from, selection, ..
1✔
983
    }) = stmt
1✔
984
    else {
985
        return Err(SQLRiteError::Internal(
×
986
            "execute_delete called on a non-DELETE statement".to_string(),
×
987
        ));
988
    };
989

990
    let tables = match from {
1✔
991
        FromTable::WithFromKeyword(t) | FromTable::WithoutKeyword(t) => t,
2✔
992
    };
993
    let table_name = extract_single_table_name(tables)?;
1✔
994

995
    // Compute matching rowids with an immutable borrow, then mutate.
996
    let matching: Vec<i64> = {
997
        let table = db
1✔
998
            .get_table(table_name.clone())
2✔
999
            .map_err(|_| SQLRiteError::Internal(format!("Table '{table_name}' not found")))?;
1✔
1000
        match select_rowids(table, selection.as_ref())? {
1✔
1001
            RowidSource::IndexProbe(rowids) => rowids,
1✔
1002
            RowidSource::FullScan => {
1003
                let mut out = Vec::new();
1✔
1004
                for rowid in table.rowids() {
3✔
1005
                    if let Some(expr) = selection {
2✔
1006
                        if !eval_predicate(expr, table, rowid)? {
2✔
1007
                            continue;
1008
                        }
1009
                    }
1010
                    out.push(rowid);
2✔
1011
                }
1012
                out
1✔
1013
            }
1014
        }
1015
    };
1016

1017
    let table = db.get_table_mut(table_name)?;
2✔
1018
    for rowid in &matching {
1✔
1019
        table.delete_row(*rowid);
2✔
1020
    }
1021
    // Phase 7d.3 — any DELETE invalidates every HNSW index on this
1022
    // table (the deleted node could still appear in other nodes'
1023
    // neighbor lists, breaking subsequent searches). Mark dirty so
1024
    // the next save rebuilds from current rows before serializing.
1025
    //
1026
    // Phase 8b — same posture for FTS indexes (Q7 — rebuild-on-save
1027
    // mirrors HNSW). The deleted rowid still appears in posting
1028
    // lists; leaving it would surface zombie hits in future queries.
1029
    if !matching.is_empty() {
1✔
1030
        for entry in &mut table.hnsw_indexes {
3✔
1031
            entry.needs_rebuild = true;
1✔
1032
        }
1033
        for entry in &mut table.fts_indexes {
2✔
1034
            entry.needs_rebuild = true;
1✔
1035
        }
1036
    }
1037
    Ok(matching.len())
2✔
1038
}
1039

1040
/// Executes an UPDATE statement. Returns the number of rows updated.
1041
pub fn execute_update(stmt: &Statement, db: &mut Database) -> Result<usize> {
1✔
1042
    let Statement::Update(Update {
1✔
1043
        table,
1✔
1044
        assignments,
1✔
1045
        from,
1✔
1046
        selection,
1✔
1047
        ..
1048
    }) = stmt
1✔
1049
    else {
1050
        return Err(SQLRiteError::Internal(
×
1051
            "execute_update called on a non-UPDATE statement".to_string(),
×
1052
        ));
1053
    };
1054

1055
    if from.is_some() {
1✔
1056
        return Err(SQLRiteError::NotImplemented(
×
1057
            "UPDATE ... FROM is not supported yet".to_string(),
×
1058
        ));
1059
    }
1060

1061
    let table_name = extract_table_name(table)?;
1✔
1062

1063
    // Resolve assignment targets to plain column names and verify they exist.
1064
    let mut parsed_assignments: Vec<(String, Expr)> = Vec::with_capacity(assignments.len());
2✔
1065
    {
1066
        let tbl = db
1✔
1067
            .get_table(table_name.clone())
2✔
1068
            .map_err(|_| SQLRiteError::Internal(format!("Table '{table_name}' not found")))?;
1✔
1069
        for a in assignments {
2✔
1070
            let col = match &a.target {
1✔
1071
                AssignmentTarget::ColumnName(name) => name
2✔
1072
                    .0
1073
                    .last()
1✔
1074
                    .map(|p| p.to_string())
3✔
1075
                    .ok_or_else(|| SQLRiteError::Internal("empty column name".to_string()))?,
1✔
1076
                AssignmentTarget::Tuple(_) => {
1077
                    return Err(SQLRiteError::NotImplemented(
×
1078
                        "tuple assignment targets are not supported".to_string(),
×
1079
                    ));
1080
                }
1081
            };
1082
            if !tbl.contains_column(col.clone()) {
2✔
1083
                return Err(SQLRiteError::Internal(format!(
×
1084
                    "UPDATE references unknown column '{col}'"
1085
                )));
1086
            }
1087
            parsed_assignments.push((col, a.value.clone()));
1✔
1088
        }
1089
    }
1090

1091
    // Gather matching rowids + the new values to write for each assignment, under
1092
    // an immutable borrow. Uses the index-probe fast path when the WHERE is
1093
    // `col = literal` on an indexed column.
1094
    let work: Vec<(i64, Vec<(String, Value)>)> = {
1095
        let tbl = db.get_table(table_name.clone())?;
1✔
1096
        let matched_rowids: Vec<i64> = match select_rowids(tbl, selection.as_ref())? {
1✔
1097
            RowidSource::IndexProbe(rowids) => rowids,
1✔
1098
            RowidSource::FullScan => {
1099
                let mut out = Vec::new();
1✔
1100
                for rowid in tbl.rowids() {
3✔
1101
                    if let Some(expr) = selection {
2✔
1102
                        if !eval_predicate(expr, tbl, rowid)? {
2✔
1103
                            continue;
1104
                        }
1105
                    }
1106
                    out.push(rowid);
2✔
1107
                }
1108
                out
1✔
1109
            }
1110
        };
1111
        let mut rows_to_update = Vec::new();
1✔
1112
        for rowid in matched_rowids {
4✔
1113
            let mut values = Vec::with_capacity(parsed_assignments.len());
2✔
1114
            for (col, expr) in &parsed_assignments {
3✔
1115
                // UPDATE's RHS is evaluated in the context of the row being updated,
1116
                // so column references on the right resolve to the current row's values.
1117
                let v = eval_expr(expr, tbl, rowid)?;
2✔
1118
                values.push((col.clone(), v));
2✔
1119
            }
1120
            rows_to_update.push((rowid, values));
1✔
1121
        }
1122
        rows_to_update
1✔
1123
    };
1124

1125
    let tbl = db.get_table_mut(table_name)?;
2✔
1126
    for (rowid, values) in &work {
1✔
1127
        for (col, v) in values {
2✔
1128
            tbl.set_value(col, *rowid, v.clone())?;
1✔
1129
        }
1130
    }
1131

1132
    // Phase 7d.3 — UPDATE may have changed a vector column that an
1133
    // HNSW index covers. Mark every covering index dirty so save
1134
    // rebuilds from current rows. (Updates that only touched
1135
    // non-vector columns also mark dirty, which is over-conservative
1136
    // but harmless — the rebuild walks rows anyway, and the cost is
1137
    // only paid on save.)
1138
    //
1139
    // Phase 8b — same shape for FTS indexes covering updated TEXT cols.
1140
    if !work.is_empty() {
1✔
1141
        let updated_columns: std::collections::HashSet<&str> = work
1✔
1142
            .iter()
1143
            .flat_map(|(_, values)| values.iter().map(|(c, _)| c.as_str()))
5✔
1144
            .collect();
1145
        for entry in &mut tbl.hnsw_indexes {
2✔
1146
            if updated_columns.contains(entry.column_name.as_str()) {
3✔
1147
                entry.needs_rebuild = true;
1✔
1148
            }
1149
        }
1150
        for entry in &mut tbl.fts_indexes {
1✔
1151
            if updated_columns.contains(entry.column_name.as_str()) {
3✔
1152
                entry.needs_rebuild = true;
1✔
1153
            }
1154
        }
1155
    }
1156
    Ok(work.len())
2✔
1157
}
1158

1159
/// Handles `CREATE INDEX [UNIQUE] <name> ON <table> [USING <method>] (<column>)`.
1160
/// Single-column indexes only.
1161
///
1162
/// Two flavours, branching on the optional `USING <method>` clause:
1163
///   - **No USING, or `USING btree`**: regular B-Tree secondary index
1164
///     (Phase 3e). Indexable types: Integer, Text.
1165
///   - **`USING hnsw`**: HNSW ANN index (Phase 7d.2). Indexable types:
1166
///     Vector(N) only. Distance metric is L2 by default; cosine and
1167
///     dot variants are deferred to Phase 7d.x.
1168
///
1169
/// Returns the (possibly synthesized) index name for the status message.
1170
pub fn execute_create_index(stmt: &Statement, db: &mut Database) -> Result<String> {
1✔
1171
    let Statement::CreateIndex(CreateIndex {
1✔
1172
        name,
1✔
1173
        table_name,
1✔
1174
        columns,
1✔
1175
        using,
1✔
1176
        unique,
1✔
1177
        if_not_exists,
1✔
1178
        predicate,
1✔
1179
        with,
1✔
1180
        ..
1181
    }) = stmt
1✔
1182
    else {
1183
        return Err(SQLRiteError::Internal(
×
1184
            "execute_create_index called on a non-CREATE-INDEX statement".to_string(),
×
1185
        ));
1186
    };
1187

1188
    if predicate.is_some() {
1✔
1189
        return Err(SQLRiteError::NotImplemented(
×
1190
            "partial indexes (CREATE INDEX ... WHERE) are not supported yet".to_string(),
×
1191
        ));
1192
    }
1193

1194
    if columns.len() != 1 {
1✔
1195
        return Err(SQLRiteError::NotImplemented(format!(
×
1196
            "multi-column indexes are not supported yet ({} columns given)",
1197
            columns.len()
×
1198
        )));
1199
    }
1200

1201
    let index_name = name.as_ref().map(|n| n.to_string()).ok_or_else(|| {
3✔
1202
        SQLRiteError::NotImplemented(
×
1203
            "anonymous CREATE INDEX (no name) is not supported — give it a name".to_string(),
×
1204
        )
1205
    })?;
1206

1207
    // Detect USING <method>. The `using` field on CreateIndex covers the
1208
    // pre-column form `CREATE INDEX … USING hnsw (col)`. (sqlparser also
1209
    // accepts a post-column form `… (col) USING hnsw` and parks that in
1210
    // `index_options`; we don't bother with it — the canonical form is
1211
    // pre-column and matches PG/pgvector convention.)
1212
    let method = match using {
1✔
1213
        Some(IndexType::Custom(ident)) if ident.value.eq_ignore_ascii_case("hnsw") => {
2✔
1214
            IndexMethod::Hnsw
1✔
1215
        }
1216
        Some(IndexType::Custom(ident)) if ident.value.eq_ignore_ascii_case("fts") => {
2✔
1217
            IndexMethod::Fts
1✔
1218
        }
1219
        Some(IndexType::Custom(ident)) if ident.value.eq_ignore_ascii_case("btree") => {
×
1220
            IndexMethod::Btree
×
1221
        }
1222
        Some(other) => {
×
1223
            return Err(SQLRiteError::NotImplemented(format!(
×
1224
                "CREATE INDEX … USING {other:?} is not supported \
1225
                 (try `hnsw`, `fts`, or no USING clause)"
1226
            )));
1227
        }
1228
        None => IndexMethod::Btree,
1✔
1229
    };
1230

1231
    // Parse `WITH (key = value, …)` options (SQLR-28). The only key
1232
    // recognized today is `metric` for HNSW indexes — `'l2'` /
1233
    // `'cosine'` / `'dot'`. The clause is rejected on non-HNSW indexes
1234
    // so a typo doesn't silently sit on a btree index where it can't
1235
    // do anything useful.
1236
    let hnsw_metric = parse_hnsw_with_options(with, &index_name, method)?;
3✔
1237

1238
    let table_name_str = table_name.to_string();
1✔
1239
    let column_name = match &columns[0].column.expr {
2✔
1240
        Expr::Identifier(ident) => ident.value.clone(),
2✔
1241
        Expr::CompoundIdentifier(parts) => parts
×
1242
            .last()
×
1243
            .map(|p| p.value.clone())
×
1244
            .ok_or_else(|| SQLRiteError::Internal("empty compound identifier".to_string()))?,
×
1245
        other => {
×
1246
            return Err(SQLRiteError::NotImplemented(format!(
×
1247
                "CREATE INDEX only supports simple column references, got {other:?}"
1248
            )));
1249
        }
1250
    };
1251

1252
    // Validate: table exists, column exists, type matches the index method,
1253
    // name is unique across both index kinds. Snapshot (rowid, value) pairs
1254
    // up front under the immutable borrow so the mutable attach later
1255
    // doesn't fight over `self`.
1256
    let (datatype, existing_rowids_and_values): (DataType, Vec<(i64, Value)>) = {
1✔
1257
        let table = db.get_table(table_name_str.clone()).map_err(|_| {
2✔
1258
            SQLRiteError::General(format!(
×
1259
                "CREATE INDEX references unknown table '{table_name_str}'"
1260
            ))
1261
        })?;
1262
        if !table.contains_column(column_name.clone()) {
1✔
1263
            return Err(SQLRiteError::General(format!(
×
1264
                "CREATE INDEX references unknown column '{column_name}' on table '{table_name_str}'"
1265
            )));
1266
        }
1267
        let col = table
3✔
1268
            .columns
1269
            .iter()
1270
            .find(|c| c.column_name == column_name)
3✔
1271
            .expect("we just verified the column exists");
1272

1273
        // Name uniqueness check spans ALL index kinds — btree, hnsw, and
1274
        // fts share one namespace per table.
1275
        if table.index_by_name(&index_name).is_some()
1✔
1276
            || table.hnsw_indexes.iter().any(|i| i.name == index_name)
4✔
1277
            || table.fts_indexes.iter().any(|i| i.name == index_name)
3✔
1278
        {
1279
            if *if_not_exists {
1✔
1280
                return Ok(index_name);
1✔
1281
            }
1282
            return Err(SQLRiteError::General(format!(
2✔
1283
                "index '{index_name}' already exists"
1284
            )));
1285
        }
1286
        let datatype = clone_datatype(&col.datatype);
1✔
1287

1288
        let mut pairs = Vec::new();
1✔
1289
        for rowid in table.rowids() {
3✔
1290
            if let Some(v) = table.get_value(&column_name, rowid) {
2✔
1291
                pairs.push((rowid, v));
1✔
1292
            }
1293
        }
1294
        (datatype, pairs)
1✔
1295
    };
1296

1297
    match method {
1✔
1298
        IndexMethod::Btree => create_btree_index(
1299
            db,
1300
            &table_name_str,
1✔
1301
            &index_name,
1✔
1302
            &column_name,
1✔
1303
            &datatype,
1304
            *unique,
1✔
1305
            &existing_rowids_and_values,
1✔
1306
        ),
1307
        IndexMethod::Hnsw => create_hnsw_index(
1308
            db,
1309
            &table_name_str,
1✔
1310
            &index_name,
1✔
1311
            &column_name,
1✔
1312
            &datatype,
1313
            *unique,
1✔
1314
            hnsw_metric.unwrap_or(DistanceMetric::L2),
1✔
1315
            &existing_rowids_and_values,
1✔
1316
        ),
1317
        IndexMethod::Fts => create_fts_index(
1318
            db,
1319
            &table_name_str,
1✔
1320
            &index_name,
1✔
1321
            &column_name,
1✔
1322
            &datatype,
1323
            *unique,
1✔
1324
            &existing_rowids_and_values,
1✔
1325
        ),
1326
    }
1327
}
1328

1329
/// Executes `DROP TABLE [IF EXISTS] <name>;`. Mirrors SQLite's single-target
1330
/// shape: sqlparser parses `DROP TABLE a, b` as one statement with
1331
/// `names: vec![a, b]`, but we reject the multi-target form to keep error
1332
/// semantics simple (no partial-failure rollback).
1333
///
1334
/// On success the table — and every index attached to it — disappears from
1335
/// the in-memory `Database`. The next auto-save rebuilds `sqlrite_master`
1336
/// from scratch and simply doesn't write a row for the dropped table or
1337
/// its indexes; pages previously occupied by them become orphans on disk
1338
/// (no free-list yet — file size doesn't shrink until a future VACUUM).
1339
pub fn execute_drop_table(
1✔
1340
    names: &[ObjectName],
1341
    if_exists: bool,
1342
    db: &mut Database,
1343
) -> Result<usize> {
1344
    if names.len() != 1 {
1✔
1345
        return Err(SQLRiteError::NotImplemented(
1✔
1346
            "DROP TABLE supports a single table per statement".to_string(),
1✔
1347
        ));
1348
    }
1349
    let name = names[0].to_string();
2✔
1350

1351
    if name == crate::sql::pager::MASTER_TABLE_NAME {
2✔
1352
        return Err(SQLRiteError::General(format!(
2✔
1353
            "'{}' is a reserved name used by the internal schema catalog",
1354
            crate::sql::pager::MASTER_TABLE_NAME
1355
        )));
1356
    }
1357

1358
    if !db.contains_table(name.clone()) {
2✔
1359
        return if if_exists {
2✔
1360
            Ok(0)
1✔
1361
        } else {
1362
            Err(SQLRiteError::General(format!(
2✔
1363
                "Table '{name}' does not exist"
1364
            )))
1365
        };
1366
    }
1367

1368
    db.tables.remove(&name);
2✔
1369
    Ok(1)
1370
}
1371

1372
/// Executes `DROP INDEX [IF EXISTS] <name>;`. The statement does not name a
1373
/// table, so we walk every table looking for the index across all three
1374
/// index families (B-Tree secondary, HNSW, FTS).
1375
///
1376
/// Refuses to drop auto-indexes (`origin == IndexOrigin::Auto`) — those are
1377
/// invariants of the table's PRIMARY KEY / UNIQUE constraints and should
1378
/// only disappear when the column or table they depend on is dropped.
1379
/// SQLite has the same rule for its `sqlite_autoindex_*` indexes.
1380
pub fn execute_drop_index(
1✔
1381
    names: &[ObjectName],
1382
    if_exists: bool,
1383
    db: &mut Database,
1384
) -> Result<usize> {
1385
    if names.len() != 1 {
1✔
1386
        return Err(SQLRiteError::NotImplemented(
×
1387
            "DROP INDEX supports a single index per statement".to_string(),
×
1388
        ));
1389
    }
1390
    let name = names[0].to_string();
2✔
1391

1392
    for table in db.tables.values_mut() {
2✔
1393
        if let Some(secondary) = table.secondary_indexes.iter().find(|i| i.name == name) {
4✔
1394
            if secondary.origin == IndexOrigin::Auto {
2✔
1395
                return Err(SQLRiteError::General(format!(
2✔
1396
                    "cannot drop auto-created index '{name}' (drop the column or table instead)"
1397
                )));
1398
            }
1399
            table.secondary_indexes.retain(|i| i.name != name);
3✔
1400
            return Ok(1);
1✔
1401
        }
1402
        if table.hnsw_indexes.iter().any(|i| i.name == name) {
×
1403
            table.hnsw_indexes.retain(|i| i.name != name);
×
1404
            return Ok(1);
×
1405
        }
1406
        if table.fts_indexes.iter().any(|i| i.name == name) {
×
1407
            table.fts_indexes.retain(|i| i.name != name);
×
1408
            return Ok(1);
×
1409
        }
1410
    }
1411

1412
    if if_exists {
2✔
1413
        Ok(0)
1414
    } else {
1415
        Err(SQLRiteError::General(format!(
2✔
1416
            "Index '{name}' does not exist"
1417
        )))
1418
    }
1419
}
1420

1421
/// Executes `ALTER TABLE [IF EXISTS] <name> <op>;` for one operation per
1422
/// statement. Supports four sub-operations matching SQLite:
1423
///
1424
///   - `RENAME TO <new>`
1425
///   - `RENAME COLUMN <old> TO <new>`
1426
///   - `ADD COLUMN <coldef>` (NOT NULL requires DEFAULT on a non-empty table;
1427
///     PK / UNIQUE constraints rejected — would need backfill + uniqueness)
1428
///   - `DROP COLUMN <name>` (refuses PK column and only-column)
1429
///
1430
/// Multi-operation ALTER (`ALTER TABLE foo RENAME TO bar, ADD COLUMN x ...`)
1431
/// is rejected; SQLite forbids it too.
1432
pub fn execute_alter_table(alter: AlterTable, db: &mut Database) -> Result<String> {
1✔
1433
    let table_name = alter.name.to_string();
1✔
1434

1435
    if table_name == crate::sql::pager::MASTER_TABLE_NAME {
2✔
1436
        return Err(SQLRiteError::General(format!(
×
1437
            "'{}' is a reserved name used by the internal schema catalog",
1438
            crate::sql::pager::MASTER_TABLE_NAME
1439
        )));
1440
    }
1441

1442
    if !db.contains_table(table_name.clone()) {
2✔
1443
        return if alter.if_exists {
2✔
1444
            Ok("ALTER TABLE: no-op (table does not exist)".to_string())
2✔
1445
        } else {
1446
            Err(SQLRiteError::General(format!(
2✔
1447
                "Table '{table_name}' does not exist"
1448
            )))
1449
        };
1450
    }
1451

1452
    if alter.operations.len() != 1 {
2✔
1453
        return Err(SQLRiteError::NotImplemented(
×
1454
            "ALTER TABLE supports one operation per statement".to_string(),
×
1455
        ));
1456
    }
1457

1458
    match &alter.operations[0] {
2✔
1459
        AlterTableOperation::RenameTable { table_name: kind } => {
1✔
1460
            let new_name = match kind {
1✔
1461
                RenameTableNameKind::To(name) => name.to_string(),
1✔
1462
                RenameTableNameKind::As(_) => {
1463
                    return Err(SQLRiteError::NotImplemented(
×
1464
                        "ALTER TABLE ... RENAME AS (MySQL-only) is not supported; use RENAME TO"
1465
                            .to_string(),
×
1466
                    ));
1467
                }
1468
            };
1469
            alter_rename_table(db, &table_name, &new_name)?;
2✔
1470
            Ok(format!(
1✔
1471
                "ALTER TABLE '{table_name}' RENAME TO '{new_name}' executed."
1472
            ))
1473
        }
1474
        AlterTableOperation::RenameColumn {
1475
            old_column_name,
1✔
1476
            new_column_name,
1✔
1477
        } => {
1478
            let old = old_column_name.value.clone();
1✔
1479
            let new = new_column_name.value.clone();
1✔
1480
            db.get_table_mut(table_name.clone())?
5✔
1481
                .rename_column(&old, &new)?;
2✔
1482
            Ok(format!(
1✔
1483
                "ALTER TABLE '{table_name}' RENAME COLUMN '{old}' TO '{new}' executed."
1484
            ))
1485
        }
1486
        AlterTableOperation::AddColumn {
1487
            column_def,
1✔
1488
            if_not_exists,
1✔
1489
            ..
1490
        } => {
1491
            let parsed = crate::sql::parser::create::parse_one_column(column_def)?;
2✔
1492
            let table = db.get_table_mut(table_name.clone())?;
2✔
1493
            if *if_not_exists && table.contains_column(parsed.name.clone()) {
1✔
1494
                return Ok(format!(
×
1495
                    "ALTER TABLE '{table_name}' ADD COLUMN: no-op (column '{}' already exists)",
1496
                    parsed.name
1497
                ));
1498
            }
1499
            let col_name = parsed.name.clone();
1✔
1500
            table.add_column(parsed)?;
2✔
1501
            Ok(format!(
1✔
1502
                "ALTER TABLE '{table_name}' ADD COLUMN '{col_name}' executed."
1503
            ))
1504
        }
1505
        AlterTableOperation::DropColumn {
1506
            column_names,
1✔
1507
            if_exists,
1✔
1508
            ..
1509
        } => {
1510
            if column_names.len() != 1 {
2✔
1511
                return Err(SQLRiteError::NotImplemented(
×
1512
                    "ALTER TABLE DROP COLUMN supports a single column per statement".to_string(),
×
1513
                ));
1514
            }
1515
            let col_name = column_names[0].value.clone();
2✔
1516
            let table = db.get_table_mut(table_name.clone())?;
2✔
1517
            if *if_exists && !table.contains_column(col_name.clone()) {
1✔
1518
                return Ok(format!(
×
1519
                    "ALTER TABLE '{table_name}' DROP COLUMN: no-op (column '{col_name}' does not exist)"
1520
                ));
1521
            }
1522
            table.drop_column(&col_name)?;
3✔
1523
            Ok(format!(
1✔
1524
                "ALTER TABLE '{table_name}' DROP COLUMN '{col_name}' executed."
1525
            ))
1526
        }
1527
        other => Err(SQLRiteError::NotImplemented(format!(
×
1528
            "ALTER TABLE operation {other:?} is not supported"
1529
        ))),
1530
    }
1531
}
1532

1533
/// Executes `VACUUM;` (SQLR-6). Compacts the database file: rewrites
1534
/// every live table, index, and the catalog contiguously from page 1,
1535
/// drops the freelist, and truncates the tail at the next checkpoint.
1536
///
1537
/// Refuses to run inside a transaction (would publish in-flight writes
1538
/// out of band); refuses on read-only databases (handled upstream by
1539
/// the read-only mutation gate); and is a no-op on in-memory databases
1540
/// (no file to compact). Bare `VACUUM;` only — non-default options
1541
/// (`FULL`, `REINDEX`, table targets, etc.) are rejected.
1542
pub fn execute_vacuum(db: &mut Database) -> Result<String> {
2✔
1543
    if db.in_transaction() {
1✔
1544
        return Err(SQLRiteError::General(
1✔
1545
            "VACUUM cannot run inside a transaction".to_string(),
1✔
1546
        ));
1547
    }
1548
    let path = match db.source_path.clone() {
1✔
1549
        Some(p) => p,
1✔
1550
        None => {
1551
            return Ok("VACUUM is a no-op for in-memory databases".to_string());
1✔
1552
        }
1553
    };
1554
    // Checkpoint before AND after VACUUM so the main-file size we report
1555
    // reflects only what VACUUM actually reclaimed — without the leading
1556
    // checkpoint, `size_before` would be the stale main-file snapshot
1557
    // (typically 2 pages) while WAL holds the live bytes, making the
1558
    // bytes-reclaimed delta meaningless.
1559
    if let Some(pager) = db.pager.as_mut() {
2✔
1560
        let _ = pager.checkpoint();
2✔
1561
    }
1562
    let size_before = std::fs::metadata(&path).ok().map(|m| m.len()).unwrap_or(0);
4✔
1563
    let pages_before = db
2✔
1564
        .pager
1565
        .as_ref()
1566
        .map(|p| p.header().page_count)
3✔
1567
        .unwrap_or(0);
1568
    crate::sql::pager::vacuum_database(db, &path)?;
1✔
1569
    // Second checkpoint so the main file shrinks now — VACUUM's whole
1570
    // purpose is to reclaim bytes, so paying the I/O up front is fair.
1571
    if let Some(pager) = db.pager.as_mut() {
1✔
1572
        let _ = pager.checkpoint();
2✔
1573
    }
1574
    let size_after = std::fs::metadata(&path).ok().map(|m| m.len()).unwrap_or(0);
4✔
1575
    let pages_after = db
2✔
1576
        .pager
1577
        .as_ref()
1578
        .map(|p| p.header().page_count)
3✔
1579
        .unwrap_or(0);
1580
    let pages_reclaimed = pages_before.saturating_sub(pages_after);
1✔
1581
    let bytes_reclaimed = size_before.saturating_sub(size_after);
1✔
1582
    Ok(format!(
1✔
1583
        "VACUUM completed. {pages_reclaimed} pages reclaimed ({bytes_reclaimed} bytes)."
1584
    ))
1585
}
1586

1587
/// Renames a table in `db.tables`. Updates `tb_name`, every secondary
1588
/// index's `table_name` field, and any auto-index whose name embedded
1589
/// the old table name. HNSW / FTS index entries don't carry a
1590
/// `table_name` field — they're addressed implicitly via the `Table`
1591
/// they live inside, so they move with the rename for free.
1592
fn alter_rename_table(db: &mut Database, old: &str, new: &str) -> Result<()> {
1✔
1593
    if new == crate::sql::pager::MASTER_TABLE_NAME {
1✔
1594
        return Err(SQLRiteError::General(format!(
1✔
1595
            "'{}' is a reserved name used by the internal schema catalog",
1596
            crate::sql::pager::MASTER_TABLE_NAME
1597
        )));
1598
    }
1599
    if old == new {
1✔
1600
        return Ok(());
×
1601
    }
1602
    if db.contains_table(new.to_string()) {
1✔
1603
        return Err(SQLRiteError::General(format!(
1✔
1604
            "target table '{new}' already exists"
1605
        )));
1606
    }
1607

1608
    let mut table = db
3✔
1609
        .tables
1610
        .remove(old)
1✔
1611
        .ok_or_else(|| SQLRiteError::General(format!("Table '{old}' does not exist")))?;
1✔
1612
    table.tb_name = new.to_string();
2✔
1613
    for idx in table.secondary_indexes.iter_mut() {
1✔
1614
        idx.table_name = new.to_string();
2✔
1615
        if idx.origin == IndexOrigin::Auto
2✔
1616
            && idx.name == SecondaryIndex::auto_name(old, &idx.column_name)
1✔
1617
        {
1618
            idx.name = SecondaryIndex::auto_name(new, &idx.column_name);
1✔
1619
        }
1620
    }
1621
    db.tables.insert(new.to_string(), table);
1✔
1622
    Ok(())
1✔
1623
}
1624

1625
/// `USING <method>` choices recognized by `execute_create_index`. A
1626
/// missing USING clause defaults to `Btree` so existing CREATE INDEX
1627
/// statements (Phase 3e) keep working unchanged.
1628
#[derive(Debug, Clone, Copy)]
1629
enum IndexMethod {
1630
    Btree,
1631
    Hnsw,
1632
    /// Phase 8b — full-text inverted index over a TEXT column.
1633
    Fts,
1634
}
1635

1636
/// Builds a Phase 3e B-Tree secondary index and attaches it to the table.
1637
fn create_btree_index(
1✔
1638
    db: &mut Database,
1639
    table_name: &str,
1640
    index_name: &str,
1641
    column_name: &str,
1642
    datatype: &DataType,
1643
    unique: bool,
1644
    existing: &[(i64, Value)],
1645
) -> Result<String> {
1646
    let mut idx = SecondaryIndex::new(
3✔
1647
        index_name.to_string(),
1✔
1648
        table_name.to_string(),
2✔
1649
        column_name.to_string(),
1✔
1650
        datatype,
1651
        unique,
1652
        IndexOrigin::Explicit,
1653
    )?;
1654

1655
    // Populate from existing rows. UNIQUE violations here mean the
1656
    // existing data already breaks the new index's constraint — a
1657
    // common source of user confusion, so be explicit.
1658
    for (rowid, v) in existing {
2✔
1659
        if unique && idx.would_violate_unique(v) {
2✔
1660
            return Err(SQLRiteError::General(format!(
1✔
1661
                "cannot create UNIQUE index '{index_name}': column '{column_name}' \
1662
                 already contains the duplicate value {}",
1663
                v.to_display_string()
1✔
1664
            )));
1665
        }
1666
        idx.insert(v, *rowid)?;
2✔
1667
    }
1668

1669
    let table_mut = db.get_table_mut(table_name.to_string())?;
1✔
1670
    table_mut.secondary_indexes.push(idx);
1✔
1671
    Ok(index_name.to_string())
1✔
1672
}
1673

1674
/// Builds a Phase 7d.2 HNSW index and attaches it to the table.
1675
fn create_hnsw_index(
1✔
1676
    db: &mut Database,
1677
    table_name: &str,
1678
    index_name: &str,
1679
    column_name: &str,
1680
    datatype: &DataType,
1681
    unique: bool,
1682
    metric: DistanceMetric,
1683
    existing: &[(i64, Value)],
1684
) -> Result<String> {
1685
    // HNSW only makes sense on VECTOR columns. Reject anything else
1686
    // with a clear message — this is the most likely user error.
1687
    let dim = match datatype {
1✔
1688
        DataType::Vector(d) => *d,
1✔
1689
        other => {
1✔
1690
            return Err(SQLRiteError::General(format!(
1✔
1691
                "USING hnsw requires a VECTOR column; '{column_name}' is {other}"
1692
            )));
1693
        }
1694
    };
1695

1696
    if unique {
1✔
1697
        return Err(SQLRiteError::General(
×
1698
            "UNIQUE has no meaning for HNSW indexes".to_string(),
×
1699
        ));
1700
    }
1701

1702
    // Build the in-memory graph. The distance metric was picked at
1703
    // CREATE INDEX time (defaults to L2 if no `WITH (metric = …)`
1704
    // clause was supplied). The graph topology is metric-specific —
1705
    // L2 neighbour pruning ≠ cosine neighbour pruning — so the
1706
    // optimizer's HNSW shortcut only fires when the query's
1707
    // `vec_distance_*` function matches this value (SQLR-28).
1708
    //
1709
    // Seed: hash the index name so different indexes get different
1710
    // graph topologies, but the same index always gets the same one
1711
    // — useful when debugging recall / index size.
1712
    let seed = hash_str_to_seed(index_name);
1✔
1713
    let mut idx = HnswIndex::new(metric, seed);
1✔
1714

1715
    // Snapshot the (rowid, vector) pairs into a side map so the
1716
    // get_vec closure below can serve them by id without re-borrowing
1717
    // the table (we're already holding `existing` — flatten it).
1718
    let mut vec_map: std::collections::HashMap<i64, Vec<f32>> =
1✔
1719
        std::collections::HashMap::with_capacity(existing.len());
1720
    for (rowid, v) in existing {
2✔
1721
        match v {
1✔
1722
            Value::Vector(vec) => {
1✔
1723
                if vec.len() != dim {
1✔
1724
                    return Err(SQLRiteError::Internal(format!(
×
1725
                        "row {rowid} stores a {}-dim vector in column '{column_name}' \
1726
                         declared as VECTOR({dim}) — schema invariant violated",
1727
                        vec.len()
×
1728
                    )));
1729
                }
1730
                vec_map.insert(*rowid, vec.clone());
2✔
1731
            }
1732
            // Non-vector values (theoretical NULL, type coercion bug)
1733
            // get skipped — they wouldn't have a sensible graph
1734
            // position anyway.
1735
            _ => continue,
1736
        }
1737
    }
1738

1739
    for (rowid, _) in existing {
1✔
1740
        if let Some(v) = vec_map.get(rowid) {
2✔
1741
            let v_clone = v.clone();
1✔
1742
            idx.insert(*rowid, &v_clone, |id| {
3✔
1743
                vec_map.get(&id).cloned().unwrap_or_default()
1✔
1744
            })?;
1745
        }
1746
    }
1747

1748
    let table_mut = db.get_table_mut(table_name.to_string())?;
1✔
1749
    table_mut.hnsw_indexes.push(HnswIndexEntry {
2✔
1750
        name: index_name.to_string(),
1✔
1751
        column_name: column_name.to_string(),
1✔
1752
        metric,
1753
        index: idx,
1✔
1754
        // Freshly built — no DELETE/UPDATE has invalidated it yet.
1755
        needs_rebuild: false,
1756
    });
1757
    Ok(index_name.to_string())
1✔
1758
}
1759

1760
/// Parses the `WITH (metric = '<name>', …)` options bag on a CREATE
1761
/// INDEX statement. Returns the chosen metric (or `None` if no
1762
/// `metric` key was supplied) on HNSW indexes; raises a
1763
/// user-visible error on:
1764
///
1765
///   - WITH options on a non-HNSW index (btree / fts have no knobs we
1766
///     understand here),
1767
///   - unknown option keys,
1768
///   - unknown metric names (typo guard — silently falling back to L2
1769
///     would hide the user's intent and re-introduce the SQLR-28 bug).
1770
fn parse_hnsw_with_options(
1✔
1771
    with: &[Expr],
1772
    index_name: &str,
1773
    method: IndexMethod,
1774
) -> Result<Option<DistanceMetric>> {
1775
    if with.is_empty() {
1✔
1776
        return Ok(None);
1✔
1777
    }
1778
    if !matches!(method, IndexMethod::Hnsw) {
2✔
1779
        return Err(SQLRiteError::General(format!(
1✔
1780
            "CREATE INDEX '{index_name}' has a WITH (...) clause but its index method \
1781
             doesn't support any options — only `USING hnsw` recognises `WITH (metric = ...)`"
1782
        )));
1783
    }
1784

1785
    let mut metric: Option<DistanceMetric> = None;
1✔
1786
    for opt in with {
2✔
1787
        let Expr::BinaryOp { left, op, right } = opt else {
2✔
1788
            return Err(SQLRiteError::General(format!(
×
1789
                "CREATE INDEX '{index_name}': unsupported WITH option {opt:?} \
1790
                 (expected `key = 'value'`)"
1791
            )));
1792
        };
1793
        if !matches!(op, BinaryOperator::Eq) {
2✔
1794
            return Err(SQLRiteError::General(format!(
×
1795
                "CREATE INDEX '{index_name}': WITH options must use `=` (got {op:?})"
1796
            )));
1797
        }
1798
        let key = match left.as_ref() {
1✔
1799
            Expr::Identifier(ident) => ident.value.clone(),
1✔
1800
            other => {
×
1801
                return Err(SQLRiteError::General(format!(
×
1802
                    "CREATE INDEX '{index_name}': WITH option key must be a bare identifier, \
1803
                     got {other:?}"
1804
                )));
1805
            }
1806
        };
1807
        let value = match right.as_ref() {
2✔
1808
            Expr::Value(v) => match &v.value {
1✔
1809
                AstValue::SingleQuotedString(s) => s.clone(),
2✔
1810
                AstValue::DoubleQuotedString(s) => s.clone(),
×
1811
                other => {
×
1812
                    return Err(SQLRiteError::General(format!(
×
1813
                        "CREATE INDEX '{index_name}': WITH option '{key}' value must be \
1814
                         a quoted string, got {other:?}"
1815
                    )));
1816
                }
1817
            },
1818
            Expr::Identifier(ident) => ident.value.clone(),
×
1819
            other => {
×
1820
                return Err(SQLRiteError::General(format!(
×
1821
                    "CREATE INDEX '{index_name}': WITH option '{key}' value must be a \
1822
                     quoted string, got {other:?}"
1823
                )));
1824
            }
1825
        };
1826

1827
        if key.eq_ignore_ascii_case("metric") {
2✔
1828
            let parsed = DistanceMetric::from_sql_name(&value).ok_or_else(|| {
5✔
1829
                SQLRiteError::General(format!(
1✔
1830
                    "CREATE INDEX '{index_name}': unknown HNSW metric '{value}' \
1831
                     (try 'l2', 'cosine', or 'dot')"
1832
                ))
1833
            })?;
1834
            if metric.is_some() {
1✔
1835
                return Err(SQLRiteError::General(format!(
×
1836
                    "CREATE INDEX '{index_name}': metric specified more than once in WITH (...)"
1837
                )));
1838
            }
1839
            metric = Some(parsed);
1✔
1840
        } else {
1841
            return Err(SQLRiteError::General(format!(
×
1842
                "CREATE INDEX '{index_name}': unknown WITH option '{key}' \
1843
                 (only 'metric' is recognised on HNSW indexes)"
1844
            )));
1845
        }
1846
    }
1847

1848
    Ok(metric)
1✔
1849
}
1850

1851
/// Builds a Phase 8b FTS inverted index and attaches it to the table.
1852
/// Mirrors [`create_hnsw_index`] in shape: validate column type,
1853
/// tokenize each existing row's text into the in-memory posting list,
1854
/// push an `FtsIndexEntry`.
1855
fn create_fts_index(
1✔
1856
    db: &mut Database,
1857
    table_name: &str,
1858
    index_name: &str,
1859
    column_name: &str,
1860
    datatype: &DataType,
1861
    unique: bool,
1862
    existing: &[(i64, Value)],
1863
) -> Result<String> {
1864
    // FTS is a TEXT-only feature for the MVP. JSON columns share the
1865
    // Row::Text storage but their content is structured — full-text
1866
    // indexing JSON keys + values would need a different design (and
1867
    // is out of scope per the Phase 8 plan's "Out of scope" section).
1868
    match datatype {
1✔
1869
        DataType::Text => {}
1870
        other => {
1✔
1871
            return Err(SQLRiteError::General(format!(
1✔
1872
                "USING fts requires a TEXT column; '{column_name}' is {other}"
1873
            )));
1874
        }
1875
    }
1876

1877
    if unique {
1✔
1878
        return Err(SQLRiteError::General(
1✔
1879
            "UNIQUE has no meaning for FTS indexes".to_string(),
1✔
1880
        ));
1881
    }
1882

1883
    let mut idx = PostingList::new();
1✔
1884
    for (rowid, v) in existing {
2✔
1885
        if let Value::Text(text) = v {
2✔
1886
            idx.insert(*rowid, text);
1✔
1887
        }
1888
        // Non-text values (Null, type coercion bugs) get skipped — same
1889
        // posture as create_hnsw_index for non-vector values.
1890
    }
1891

1892
    let table_mut = db.get_table_mut(table_name.to_string())?;
1✔
1893
    table_mut.fts_indexes.push(FtsIndexEntry {
2✔
1894
        name: index_name.to_string(),
1✔
1895
        column_name: column_name.to_string(),
1✔
1896
        index: idx,
1✔
1897
        needs_rebuild: false,
1898
    });
1899
    Ok(index_name.to_string())
1✔
1900
}
1901

1902
/// Stable, deterministic hash of a string into a u64 RNG seed. FNV-1a;
1903
/// avoids pulling in `std::hash::DefaultHasher` (which is randomized
1904
/// per process).
1905
fn hash_str_to_seed(s: &str) -> u64 {
1✔
1906
    let mut h: u64 = 0xCBF29CE484222325;
1✔
1907
    for b in s.as_bytes() {
2✔
1908
        h ^= *b as u64;
1✔
1909
        h = h.wrapping_mul(0x100000001B3);
1✔
1910
    }
1911
    h
1✔
1912
}
1913

1914
/// Cheap clone helper — `DataType` intentionally doesn't derive `Clone`
1915
/// because the enum has no ergonomic reason to be cloneable elsewhere.
1916
fn clone_datatype(dt: &DataType) -> DataType {
1✔
1917
    match dt {
1✔
1918
        DataType::Integer => DataType::Integer,
1✔
1919
        DataType::Text => DataType::Text,
1✔
1920
        DataType::Real => DataType::Real,
×
1921
        DataType::Bool => DataType::Bool,
×
1922
        DataType::Vector(dim) => DataType::Vector(*dim),
1✔
1923
        DataType::Json => DataType::Json,
×
1924
        DataType::None => DataType::None,
×
1925
        DataType::Invalid => DataType::Invalid,
×
1926
    }
1927
}
1928

1929
fn extract_single_table_name(tables: &[TableWithJoins]) -> Result<String> {
1✔
1930
    if tables.len() != 1 {
1✔
1931
        return Err(SQLRiteError::NotImplemented(
×
1932
            "multi-table DELETE is not supported yet".to_string(),
×
1933
        ));
1934
    }
1935
    extract_table_name(&tables[0])
2✔
1936
}
1937

1938
fn extract_table_name(twj: &TableWithJoins) -> Result<String> {
1✔
1939
    if !twj.joins.is_empty() {
1✔
1940
        return Err(SQLRiteError::NotImplemented(
×
1941
            "JOIN is not supported yet".to_string(),
×
1942
        ));
1943
    }
1944
    match &twj.relation {
1✔
1945
        TableFactor::Table { name, .. } => Ok(name.to_string()),
1✔
1946
        _ => Err(SQLRiteError::NotImplemented(
×
1947
            "only plain table references are supported".to_string(),
×
1948
        )),
1949
    }
1950
}
1951

1952
/// Tells the executor how to produce its candidate rowid list.
1953
enum RowidSource {
1954
    /// The WHERE was simple enough to probe a secondary index directly.
1955
    /// The `Vec` already contains exactly the rows the index matched;
1956
    /// no further WHERE evaluation is needed (the probe is precise).
1957
    IndexProbe(Vec<i64>),
1958
    /// No applicable index; caller falls back to walking `table.rowids()`
1959
    /// and evaluating the WHERE on each row.
1960
    FullScan,
1961
}
1962

1963
/// Try to satisfy `WHERE` with an index probe. Currently supports the
1964
/// simplest shape: a single `col = literal` (or `literal = col`) where
1965
/// `col` is on a secondary index. AND/OR/range predicates fall back to
1966
/// full scan — those can be layered on later without changing the caller.
1967
fn select_rowids(table: &Table, selection: Option<&Expr>) -> Result<RowidSource> {
1✔
1968
    let Some(expr) = selection else {
1✔
1969
        return Ok(RowidSource::FullScan);
1✔
1970
    };
1971
    let Some((col, literal)) = try_extract_equality(expr) else {
2✔
1972
        return Ok(RowidSource::FullScan);
1✔
1973
    };
1974
    let Some(idx) = table.index_for_column(&col) else {
2✔
1975
        return Ok(RowidSource::FullScan);
1✔
1976
    };
1977

1978
    // Convert the literal into a runtime Value. If the literal type doesn't
1979
    // match the column's index we still need correct semantics — evaluate
1980
    // the WHERE against every row. Fall back to full scan.
1981
    let literal_value = match convert_literal(&literal) {
2✔
1982
        Ok(v) => v,
1✔
1983
        Err(_) => return Ok(RowidSource::FullScan),
×
1984
    };
1985

1986
    // Index lookup returns the full list of rowids matching this equality
1987
    // predicate. For unique indexes that's at most one; for non-unique it
1988
    // can be many.
1989
    let mut rowids = idx.lookup(&literal_value);
1✔
1990
    rowids.sort_unstable();
2✔
1991
    Ok(RowidSource::IndexProbe(rowids))
1✔
1992
}
1993

1994
/// Recognizes `expr` as a simple equality on a column reference against a
1995
/// literal. Returns `(column_name, literal_value)` if the shape matches;
1996
/// `None` otherwise. Accepts both `col = literal` and `literal = col`.
1997
fn try_extract_equality(expr: &Expr) -> Option<(String, sqlparser::ast::Value)> {
1✔
1998
    // Peel off Nested parens so `WHERE (x = 1)` is recognized too.
1999
    let peeled = match expr {
1✔
2000
        Expr::Nested(inner) => inner.as_ref(),
1✔
2001
        other => other,
1✔
2002
    };
2003
    let Expr::BinaryOp { left, op, right } = peeled else {
1✔
2004
        return None;
1✔
2005
    };
2006
    if !matches!(op, BinaryOperator::Eq) {
2✔
2007
        return None;
1✔
2008
    }
2009
    let col_from = |e: &Expr| -> Option<String> {
1✔
2010
        match e {
1✔
2011
            Expr::Identifier(ident) => Some(ident.value.clone()),
1✔
2012
            Expr::CompoundIdentifier(parts) => parts.last().map(|p| p.value.clone()),
×
2013
            _ => None,
1✔
2014
        }
2015
    };
2016
    let literal_from = |e: &Expr| -> Option<sqlparser::ast::Value> {
1✔
2017
        if let Expr::Value(v) = e {
2✔
2018
            Some(v.value.clone())
1✔
2019
        } else {
2020
            None
1✔
2021
        }
2022
    };
2023
    if let (Some(c), Some(l)) = (col_from(left), literal_from(right)) {
3✔
2024
        return Some((c, l));
1✔
2025
    }
2026
    if let (Some(l), Some(c)) = (literal_from(left), col_from(right)) {
3✔
2027
        return Some((c, l));
1✔
2028
    }
2029
    None
1✔
2030
}
2031

2032
/// Recognizes the HNSW-probable query pattern and probes the graph
2033
/// if a matching index exists.
2034
///
2035
/// Looks for ORDER BY `vec_distance_<l2|cosine|dot>(<col>, <bracket-
2036
/// array literal>)` where the table has an HNSW index attached to
2037
/// `<col>` *built for that same distance metric*. On a match, returns
2038
/// the top-k rowids straight from the graph (O(log N)). On any miss —
2039
/// different function name, no matching index, query dimension wrong,
2040
/// metric mismatch, etc. — returns `None` and the caller falls through
2041
/// to the bounded-heap brute-force path (7c) or the full sort (7b),
2042
/// preserving correct results regardless of whether the HNSW pathway
2043
/// kicked in.
2044
///
2045
/// Caveats:
2046
/// - The index's metric and the query's `vec_distance_*` function must
2047
///   agree. An L2-built graph silently doesn't help cosine queries
2048
///   (different neighbour pruning policy → potentially different
2049
///   topology), so we don't pretend to.  Pick the metric at CREATE
2050
///   INDEX time via `WITH (metric = '<l2|cosine|dot>')` (SQLR-28).
2051
/// - Only ASCENDING order makes sense for "k nearest" — DESC ORDER BY
2052
///   `vec_distance_*(...) LIMIT k` would mean "k farthest", which isn't
2053
///   what the index is built for. We don't bother to detect
2054
///   `ascending == false` here; the optimizer just skips and the
2055
///   fallback path handles it correctly (slower).
2056
fn try_hnsw_probe(table: &Table, order_expr: &Expr, k: usize) -> Option<Vec<i64>> {
1✔
2057
    if k == 0 {
1✔
2058
        return None;
×
2059
    }
2060

2061
    // Pattern-match: order expr must be a function call
2062
    // vec_distance_<l2|cosine|dot>(a, b).
2063
    let func = match order_expr {
1✔
2064
        Expr::Function(f) => f,
1✔
2065
        _ => return None,
1✔
2066
    };
2067
    let fname = match func.name.0.as_slice() {
2✔
2068
        [ObjectNamePart::Identifier(ident)] => ident.value.to_lowercase(),
2✔
2069
        _ => return None,
×
2070
    };
2071
    let query_metric = match fname.as_str() {
2✔
2072
        "vec_distance_l2" => DistanceMetric::L2,
2✔
2073
        "vec_distance_cosine" => DistanceMetric::Cosine,
3✔
2074
        "vec_distance_dot" => DistanceMetric::Dot,
3✔
2075
        _ => return None,
1✔
2076
    };
2077

2078
    // Extract the two args as raw Exprs.
2079
    let arg_list = match &func.args {
1✔
2080
        FunctionArguments::List(l) => &l.args,
1✔
2081
        _ => return None,
×
2082
    };
2083
    if arg_list.len() != 2 {
2✔
2084
        return None;
×
2085
    }
2086
    let exprs: Vec<&Expr> = arg_list
1✔
2087
        .iter()
2088
        .filter_map(|a| match a {
3✔
2089
            FunctionArg::Unnamed(FunctionArgExpr::Expr(e)) => Some(e),
1✔
2090
            _ => None,
×
2091
        })
2092
        .collect();
2093
    if exprs.len() != 2 {
2✔
2094
        return None;
×
2095
    }
2096

2097
    // One arg must be a column reference (the indexed col); the other
2098
    // must be a bracket-array literal (the query vector). Try both
2099
    // orderings — pgvector's idiom puts the column on the left, but
2100
    // SQL is commutative for distance.
2101
    let (col_name, query_vec) = match identify_indexed_arg_and_literal(exprs[0], exprs[1]) {
3✔
2102
        Some(v) => v,
1✔
2103
        None => match identify_indexed_arg_and_literal(exprs[1], exprs[0]) {
×
2104
            Some(v) => v,
×
2105
            None => return None,
×
2106
        },
2107
    };
2108

2109
    // Find the HNSW index on this column AND with a matching metric.
2110
    // Multiple indexes on the same column are allowed in principle
2111
    // (cosine-built + L2-built), and a query picks whichever metric
2112
    // its `vec_distance_*` function names.
2113
    let entry = table
4✔
2114
        .hnsw_indexes
2115
        .iter()
1✔
2116
        .find(|e| e.column_name == col_name && e.metric == query_metric)?;
3✔
2117

2118
    // Dimension sanity check — the query vector must match the
2119
    // indexed column's declared dimension. If it doesn't, the brute-
2120
    // force fallback would also error at the vec_distance_l2 dim-check;
2121
    // returning None here lets that path produce the user-visible
2122
    // error message.
2123
    let declared_dim = match table.columns.iter().find(|c| c.column_name == col_name) {
3✔
2124
        Some(c) => match &c.datatype {
1✔
2125
            DataType::Vector(d) => *d,
1✔
2126
            _ => return None,
×
2127
        },
2128
        None => return None,
×
2129
    };
2130
    if query_vec.len() != declared_dim {
2✔
2131
        return None;
×
2132
    }
2133

2134
    // Probe the graph. Vectors are looked up from the table's row
2135
    // storage — a closure rather than a `&Table` so the algorithm
2136
    // module stays decoupled from the SQL types.
2137
    let column_for_closure = col_name.clone();
1✔
2138
    let table_ref = table;
2139
    let result = entry
1✔
2140
        .index
2141
        .search(&query_vec, k, |id| {
3✔
2142
            match table_ref.get_value(&column_for_closure, id) {
1✔
2143
                Some(Value::Vector(v)) => v,
1✔
2144
                _ => Vec::new(),
×
2145
            }
2146
        })
2147
        .ok()?;
1✔
2148
    Some(result)
1✔
2149
}
2150

2151
/// Phase 8b — FTS optimizer hook.
2152
///
2153
/// Recognizes `ORDER BY bm25_score(<col>, '<query>') DESC LIMIT <k>`
2154
/// and serves it from the FTS index instead of full-scanning. Returns
2155
/// `Some(rowids)` already sorted by descending BM25 (with rowid
2156
/// ascending as tie-break), or `None` to fall through to scalar eval.
2157
///
2158
/// **Known limitation (mirrors `try_hnsw_probe`).** This shortcut
2159
/// ignores any `WHERE` clause. The canonical FTS query has a
2160
/// `WHERE fts_match(<col>, '<q>')` predicate, which is implicitly
2161
/// satisfied by the probe results — so dropping it is harmless.
2162
/// Anything *else* in the WHERE (`AND status = 'published'`) gets
2163
/// silently skipped on the optimizer path. Per Phase 8 plan Q6 we
2164
/// match HNSW's posture here; a correctness-preserving multi-index
2165
/// composer is deferred.
2166
fn try_fts_probe(table: &Table, order_expr: &Expr, ascending: bool, k: usize) -> Option<Vec<i64>> {
1✔
2167
    if k == 0 || ascending {
1✔
2168
        // BM25 is "higher = better"; ASC ranking is almost certainly a
2169
        // user mistake. Fall through so the caller gets either an
2170
        // explicit error from scalar eval or the slow correct path.
2171
        return None;
1✔
2172
    }
2173

2174
    let func = match order_expr {
1✔
2175
        Expr::Function(f) => f,
1✔
2176
        _ => return None,
×
2177
    };
2178
    let fname = match func.name.0.as_slice() {
2✔
2179
        [ObjectNamePart::Identifier(ident)] => ident.value.to_lowercase(),
2✔
2180
        _ => return None,
×
2181
    };
2182
    if fname != "bm25_score" {
2✔
2183
        return None;
×
2184
    }
2185

2186
    let arg_list = match &func.args {
1✔
2187
        FunctionArguments::List(l) => &l.args,
1✔
2188
        _ => return None,
×
2189
    };
2190
    if arg_list.len() != 2 {
2✔
2191
        return None;
×
2192
    }
2193
    let exprs: Vec<&Expr> = arg_list
1✔
2194
        .iter()
2195
        .filter_map(|a| match a {
3✔
2196
            FunctionArg::Unnamed(FunctionArgExpr::Expr(e)) => Some(e),
1✔
2197
            _ => None,
×
2198
        })
2199
        .collect();
2200
    if exprs.len() != 2 {
2✔
2201
        return None;
×
2202
    }
2203

2204
    // Arg 0 must be a bare column identifier.
2205
    let col_name = match exprs[0] {
2✔
2206
        Expr::Identifier(ident) if ident.quote_style.is_none() => ident.value.clone(),
2✔
2207
        _ => return None,
×
2208
    };
2209

2210
    // Arg 1 must be a single-quoted string literal. Anything else
2211
    // (column reference, function call) requires per-row evaluation —
2212
    // we'd lose the whole point of the probe.
2213
    let query = match exprs[1] {
2✔
2214
        Expr::Value(v) => match &v.value {
1✔
2215
            AstValue::SingleQuotedString(s) => s.clone(),
1✔
2216
            _ => return None,
×
2217
        },
2218
        _ => return None,
×
2219
    };
2220

2221
    let entry = table
3✔
2222
        .fts_indexes
2223
        .iter()
1✔
2224
        .find(|e| e.column_name == col_name)?;
3✔
2225

2226
    let scored = entry.index.query(&query, &Bm25Params::default());
1✔
2227
    let mut out: Vec<i64> = scored.into_iter().map(|(id, _)| id).collect();
3✔
2228
    if out.len() > k {
2✔
2229
        out.truncate(k);
1✔
2230
    }
2231
    Some(out)
1✔
2232
}
2233

2234
/// Helper for `try_hnsw_probe`: given two function args, identify which
2235
/// one is a bare column identifier (the indexed column) and which is a
2236
/// bracket-array literal (the query vector). Returns
2237
/// `Some((column_name, query_vec))` on a match, `None` otherwise.
2238
fn identify_indexed_arg_and_literal(a: &Expr, b: &Expr) -> Option<(String, Vec<f32>)> {
1✔
2239
    let col_name = match a {
1✔
2240
        Expr::Identifier(ident) if ident.quote_style.is_none() => ident.value.clone(),
2✔
2241
        _ => return None,
×
2242
    };
2243
    let lit_str = match b {
1✔
2244
        Expr::Identifier(ident) if ident.quote_style == Some('[') => {
2✔
2245
            format!("[{}]", ident.value)
1✔
2246
        }
2247
        _ => return None,
×
2248
    };
2249
    let v = parse_vector_literal(&lit_str).ok()?;
2✔
2250
    Some((col_name, v))
1✔
2251
}
2252

2253
/// One entry in the bounded-heap top-k path. Holds a pre-evaluated
2254
/// sort key + the rowid it came from. The `asc` flag inverts `Ord`
2255
/// so a single `BinaryHeap<HeapEntry>` works for both ASC and DESC
2256
/// without wrapping in `std::cmp::Reverse` at the call site:
2257
///
2258
///   - ASC LIMIT k = "k smallest": natural Ord. Max-heap top is the
2259
///     largest currently kept; new items smaller than top displace.
2260
///   - DESC LIMIT k = "k largest": Ord reversed. Max-heap top is now
2261
///     the smallest currently kept (under reversed Ord, smallest
2262
///     looks largest); new items larger than top displace.
2263
///
2264
/// In both cases the displacement test reduces to "new entry < heap top".
2265
struct HeapEntry {
2266
    key: Value,
2267
    rowid: i64,
2268
    asc: bool,
2269
}
2270

2271
impl PartialEq for HeapEntry {
2272
    fn eq(&self, other: &Self) -> bool {
×
2273
        self.cmp(other) == Ordering::Equal
×
2274
    }
2275
}
2276

2277
impl Eq for HeapEntry {}
2278

2279
impl PartialOrd for HeapEntry {
2280
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1✔
2281
        Some(self.cmp(other))
1✔
2282
    }
2283
}
2284

2285
impl Ord for HeapEntry {
2286
    fn cmp(&self, other: &Self) -> Ordering {
1✔
2287
        let raw = compare_values(Some(&self.key), Some(&other.key));
1✔
2288
        if self.asc { raw } else { raw.reverse() }
1✔
2289
    }
2290
}
2291

2292
/// Bounded-heap top-k selection. Returns at most `k` rowids in the
2293
/// caller's desired order (ascending key for `order.ascending`,
2294
/// descending otherwise).
2295
///
2296
/// O(N log k) where N = `matching.len()`. Caller must check
2297
/// `k < matching.len()` for this to be a win — for k ≥ N the
2298
/// `sort_rowids` full-sort path is the same asymptotic cost without
2299
/// the heap overhead.
2300
fn select_topk(
1✔
2301
    matching: &[i64],
2302
    table: &Table,
2303
    order: &OrderByClause,
2304
    k: usize,
2305
) -> Result<Vec<i64>> {
2306
    use std::collections::BinaryHeap;
2307

2308
    if k == 0 || matching.is_empty() {
1✔
2309
        return Ok(Vec::new());
1✔
2310
    }
2311

2312
    let mut heap: BinaryHeap<HeapEntry> = BinaryHeap::with_capacity(k + 1);
1✔
2313

2314
    for &rowid in matching {
3✔
2315
        let key = eval_expr(&order.expr, table, rowid)?;
2✔
2316
        let entry = HeapEntry {
2317
            key,
2318
            rowid,
2319
            asc: order.ascending,
1✔
2320
        };
2321

2322
        if heap.len() < k {
2✔
2323
            heap.push(entry);
2✔
2324
        } else {
2325
            // peek() returns the largest under our direction-aware Ord
2326
            // — the worst entry currently kept. Displace it iff the
2327
            // new entry is "better" (i.e. compares Less).
2328
            if entry < *heap.peek().unwrap() {
2✔
2329
                heap.pop();
1✔
2330
                heap.push(entry);
1✔
2331
            }
2332
        }
2333
    }
2334

2335
    // `into_sorted_vec` returns ascending under our direction-aware Ord:
2336
    //   ASC: ascending by raw key (what we want)
2337
    //   DESC: ascending under reversed Ord = descending by raw key (what
2338
    //         we want for an ORDER BY DESC LIMIT k result)
2339
    Ok(heap
2✔
2340
        .into_sorted_vec()
1✔
2341
        .into_iter()
1✔
2342
        .map(|e| e.rowid)
3✔
2343
        .collect())
1✔
2344
}
2345

2346
fn sort_rowids(rowids: &mut [i64], table: &Table, order: &OrderByClause) -> Result<()> {
1✔
2347
    // Phase 7b: ORDER BY now accepts any expression (column ref,
2348
    // arithmetic, function call, …). Pre-compute the sort key for
2349
    // every rowid up front so the comparator is called O(N log N)
2350
    // times against pre-evaluated Values rather than re-evaluating
2351
    // the expression O(N log N) times. Not strictly necessary today,
2352
    // but vital once 7d's HNSW index lands and this same code path
2353
    // could be running tens of millions of distance computations.
2354
    let mut keys: Vec<(i64, Result<Value>)> = rowids
2✔
2355
        .iter()
2356
        .map(|r| (*r, eval_expr(&order.expr, table, *r)))
3✔
2357
        .collect();
2358

2359
    // Surface the FIRST evaluation error if any. We could be lazy
2360
    // and let sort_by encounter it, but `Ord::cmp` can't return a
2361
    // Result and we'd have to swallow errors silently.
2362
    for (_, k) in &keys {
2✔
2363
        if let Err(e) = k {
2✔
2364
            return Err(SQLRiteError::General(format!(
1✔
2365
                "ORDER BY expression failed: {e}"
2366
            )));
2367
        }
2368
    }
2369

2370
    keys.sort_by(|(_, ka), (_, kb)| {
3✔
2371
        // Both unwrap()s are safe — we just verified above that
2372
        // every key Result is Ok.
2373
        let va = ka.as_ref().unwrap();
1✔
2374
        let vb = kb.as_ref().unwrap();
1✔
2375
        let ord = compare_values(Some(va), Some(vb));
1✔
2376
        if order.ascending { ord } else { ord.reverse() }
1✔
2377
    });
2378

2379
    // Write the sorted rowids back into the caller's slice.
2380
    for (i, (rowid, _)) in keys.into_iter().enumerate() {
2✔
2381
        rowids[i] = rowid;
2✔
2382
    }
2383
    Ok(())
1✔
2384
}
2385

2386
fn compare_values(a: Option<&Value>, b: Option<&Value>) -> Ordering {
1✔
2387
    match (a, b) {
2✔
2388
        (None, None) => Ordering::Equal,
×
2389
        (None, _) => Ordering::Less,
×
2390
        (_, None) => Ordering::Greater,
×
2391
        (Some(a), Some(b)) => match (a, b) {
3✔
2392
            (Value::Null, Value::Null) => Ordering::Equal,
×
2393
            (Value::Null, _) => Ordering::Less,
1✔
2394
            (_, Value::Null) => Ordering::Greater,
×
2395
            (Value::Integer(x), Value::Integer(y)) => x.cmp(y),
1✔
2396
            (Value::Real(x), Value::Real(y)) => x.partial_cmp(y).unwrap_or(Ordering::Equal),
1✔
2397
            (Value::Integer(x), Value::Real(y)) => {
×
2398
                (*x as f64).partial_cmp(y).unwrap_or(Ordering::Equal)
×
2399
            }
2400
            (Value::Real(x), Value::Integer(y)) => {
×
2401
                x.partial_cmp(&(*y as f64)).unwrap_or(Ordering::Equal)
×
2402
            }
2403
            (Value::Text(x), Value::Text(y)) => x.cmp(y),
1✔
2404
            (Value::Bool(x), Value::Bool(y)) => x.cmp(y),
×
2405
            // Cross-type fallback: stringify and compare; keeps ORDER BY total.
2406
            (x, y) => x.to_display_string().cmp(&y.to_display_string()),
×
2407
        },
2408
    }
2409
}
2410

2411
/// Returns `true` if the row at `rowid` matches the predicate expression.
2412
pub fn eval_predicate(expr: &Expr, table: &Table, rowid: i64) -> Result<bool> {
1✔
2413
    eval_predicate_scope(expr, &SingleTableScope::new(table, rowid))
1✔
2414
}
2415

2416
/// Scope-aware predicate evaluation. The single-table fast path wraps
2417
/// this with a [`SingleTableScope`]; the join executor wraps it with
2418
/// a [`JoinedScope`].
2419
pub(crate) fn eval_predicate_scope(expr: &Expr, scope: &dyn RowScope) -> Result<bool> {
1✔
2420
    let v = eval_expr_scope(expr, scope)?;
2✔
2421
    match v {
1✔
2422
        Value::Bool(b) => Ok(b),
1✔
2423
        Value::Null => Ok(false), // SQL NULL in a WHERE is treated as false
2424
        Value::Integer(i) => Ok(i != 0),
1✔
2425
        other => Err(SQLRiteError::Internal(format!(
×
2426
            "WHERE clause must evaluate to boolean, got {}",
2427
            other.to_display_string()
×
2428
        ))),
2429
    }
2430
}
2431

2432
/// Single-table convenience wrapper around [`eval_expr_scope`].
2433
fn eval_expr(expr: &Expr, table: &Table, rowid: i64) -> Result<Value> {
1✔
2434
    eval_expr_scope(expr, &SingleTableScope::new(table, rowid))
1✔
2435
}
2436

2437
fn eval_expr_scope(expr: &Expr, scope: &dyn RowScope) -> Result<Value> {
1✔
2438
    match expr {
1✔
2439
        Expr::Nested(inner) => eval_expr_scope(inner, scope),
2✔
2440

2441
        Expr::Identifier(ident) => {
1✔
2442
            // Phase 7b — sqlparser parses bracket-array literals like
2443
            // `[0.1, 0.2, 0.3]` as bracket-quoted identifiers (it inherits
2444
            // MSSQL `[name]` syntax). When we see `quote_style == Some('[')`
2445
            // in expression-evaluation position (SELECT projection, WHERE,
2446
            // ORDER BY, function args), parse the bracketed content as a
2447
            // vector literal so the rest of the executor can compare /
2448
            // distance-compute against it. Same trick the INSERT parser
2449
            // uses; the executor needed its own copy because expression
2450
            // eval runs on a different code path.
2451
            if ident.quote_style == Some('[') {
1✔
2452
                let raw = format!("[{}]", ident.value);
1✔
2453
                let v = parse_vector_literal(&raw)?;
2✔
2454
                return Ok(Value::Vector(v));
1✔
2455
            }
2456
            scope.lookup(None, &ident.value)
1✔
2457
        }
2458

2459
        Expr::CompoundIdentifier(parts) => {
1✔
2460
            // `qualifier.col` — single-table scope ignores the qualifier
2461
            // (legacy behavior). Joined scope dispatches to the table
2462
            // matching `qualifier`. The compound form must have at
2463
            // least two parts; deeper paths (`db.schema.t.col`) are
2464
            // not supported.
2465
            match parts.as_slice() {
1✔
2466
                [only] => scope.lookup(None, &only.value),
1✔
2467
                [q, c] => scope.lookup(Some(&q.value), &c.value),
2✔
2468
                _ => Err(SQLRiteError::NotImplemented(format!(
×
2469
                    "compound identifier with {} parts is not supported",
2470
                    parts.len()
×
2471
                ))),
2472
            }
2473
        }
2474

2475
        Expr::Value(v) => convert_literal(&v.value),
1✔
2476

2477
        Expr::UnaryOp { op, expr } => {
×
2478
            let inner = eval_expr_scope(expr, scope)?;
×
2479
            match op {
×
2480
                UnaryOperator::Not => match inner {
×
2481
                    Value::Bool(b) => Ok(Value::Bool(!b)),
×
2482
                    Value::Null => Ok(Value::Null),
×
2483
                    other => Err(SQLRiteError::Internal(format!(
×
2484
                        "NOT applied to non-boolean value: {}",
2485
                        other.to_display_string()
×
2486
                    ))),
2487
                },
2488
                UnaryOperator::Minus => match inner {
×
2489
                    Value::Integer(i) => Ok(Value::Integer(-i)),
×
2490
                    Value::Real(f) => Ok(Value::Real(-f)),
×
2491
                    Value::Null => Ok(Value::Null),
×
2492
                    other => Err(SQLRiteError::Internal(format!(
×
2493
                        "unary minus on non-numeric value: {}",
2494
                        other.to_display_string()
×
2495
                    ))),
2496
                },
2497
                UnaryOperator::Plus => Ok(inner),
×
2498
                other => Err(SQLRiteError::NotImplemented(format!(
×
2499
                    "unary operator {other:?} is not supported"
2500
                ))),
2501
            }
2502
        }
2503

2504
        Expr::BinaryOp { left, op, right } => match op {
1✔
2505
            BinaryOperator::And => {
2506
                let l = eval_expr_scope(left, scope)?;
2✔
2507
                let r = eval_expr_scope(right, scope)?;
2✔
2508
                Ok(Value::Bool(as_bool(&l)? && as_bool(&r)?))
3✔
2509
            }
2510
            BinaryOperator::Or => {
2511
                let l = eval_expr_scope(left, scope)?;
×
2512
                let r = eval_expr_scope(right, scope)?;
×
2513
                Ok(Value::Bool(as_bool(&l)? || as_bool(&r)?))
×
2514
            }
2515
            cmp @ (BinaryOperator::Eq
2516
            | BinaryOperator::NotEq
2517
            | BinaryOperator::Lt
2518
            | BinaryOperator::LtEq
2519
            | BinaryOperator::Gt
2520
            | BinaryOperator::GtEq) => {
2521
                let l = eval_expr_scope(left, scope)?;
2✔
2522
                let r = eval_expr_scope(right, scope)?;
3✔
2523
                // Any comparison involving NULL is unknown → false in a WHERE.
2524
                if matches!(l, Value::Null) || matches!(r, Value::Null) {
2✔
2525
                    return Ok(Value::Bool(false));
1✔
2526
                }
2527
                let ord = compare_values(Some(&l), Some(&r));
2✔
2528
                let result = match cmp {
1✔
2529
                    BinaryOperator::Eq => ord == Ordering::Equal,
2✔
2530
                    BinaryOperator::NotEq => ord != Ordering::Equal,
×
2531
                    BinaryOperator::Lt => ord == Ordering::Less,
2✔
2532
                    BinaryOperator::LtEq => ord != Ordering::Greater,
×
2533
                    BinaryOperator::Gt => ord == Ordering::Greater,
2✔
2534
                    BinaryOperator::GtEq => ord != Ordering::Less,
2✔
2535
                    _ => unreachable!(),
2536
                };
2537
                Ok(Value::Bool(result))
1✔
2538
            }
2539
            arith @ (BinaryOperator::Plus
2540
            | BinaryOperator::Minus
2541
            | BinaryOperator::Multiply
2542
            | BinaryOperator::Divide
2543
            | BinaryOperator::Modulo) => {
2544
                let l = eval_expr_scope(left, scope)?;
2✔
2545
                let r = eval_expr_scope(right, scope)?;
2✔
2546
                eval_arith(arith, &l, &r)
1✔
2547
            }
2548
            BinaryOperator::StringConcat => {
2549
                let l = eval_expr_scope(left, scope)?;
×
2550
                let r = eval_expr_scope(right, scope)?;
×
2551
                if matches!(l, Value::Null) || matches!(r, Value::Null) {
×
2552
                    return Ok(Value::Null);
×
2553
                }
2554
                Ok(Value::Text(format!(
×
2555
                    "{}{}",
2556
                    l.to_display_string(),
×
2557
                    r.to_display_string()
×
2558
                )))
2559
            }
2560
            other => Err(SQLRiteError::NotImplemented(format!(
×
2561
                "binary operator {other:?} is not supported yet"
2562
            ))),
2563
        },
2564

2565
        // SQLR-7 — `col IS NULL` / `col IS NOT NULL`. Identifier
2566
        // evaluation already maps a missing rowid in the column's
2567
        // BTreeMap to `Value::Null`, so this works uniformly for
2568
        // explicit NULL inserts, omitted columns, and (post-Phase 7e)
2569
        // legacy "Null"-sentinel TEXT cells. NULLs are never inserted
2570
        // into secondary / HNSW / FTS indexes, so an IS NULL probe
2571
        // correctly falls through to a full scan via `select_rowids`.
2572
        Expr::IsNull(inner) => {
1✔
2573
            let v = eval_expr_scope(inner, scope)?;
2✔
2574
            Ok(Value::Bool(matches!(v, Value::Null)))
1✔
2575
        }
2576
        Expr::IsNotNull(inner) => {
1✔
2577
            let v = eval_expr_scope(inner, scope)?;
2✔
2578
            Ok(Value::Bool(!matches!(v, Value::Null)))
1✔
2579
        }
2580

2581
        // SQLR-3 — LIKE / NOT LIKE / ILIKE. Pattern matching uses our
2582
        // own iterative two-pointer matcher (see `agg::like_match`).
2583
        // SQLite's default is case-insensitive ASCII; we follow that.
2584
        // ILIKE is also case-insensitive (a no-op switch here, but we
2585
        // keep the arm explicit so SQLite users typing ILIKE get the
2586
        // expected semantics rather than a NotImplemented).
2587
        Expr::Like {
2588
            negated,
1✔
2589
            any,
1✔
2590
            expr: lhs,
1✔
2591
            pattern,
1✔
2592
            escape_char,
1✔
2593
        } => eval_like(
2594
            scope,
2595
            *negated,
1✔
2596
            *any,
1✔
2597
            lhs,
2✔
2598
            pattern,
2✔
2599
            escape_char.as_ref(),
1✔
2600
            true,
2601
        ),
2602
        Expr::ILike {
2603
            negated,
×
2604
            any,
×
2605
            expr: lhs,
×
2606
            pattern,
×
2607
            escape_char,
×
2608
        } => eval_like(
2609
            scope,
2610
            *negated,
×
2611
            *any,
×
2612
            lhs,
×
2613
            pattern,
×
2614
            escape_char.as_ref(),
×
2615
            true,
2616
        ),
2617

2618
        // SQLR-3 — IN (list) / NOT IN (list). Subquery form is rejected.
2619
        // Three-valued logic: if the LHS is NULL, return NULL; if any
2620
        // list entry is NULL and no match was found, return NULL too.
2621
        // WHERE coerces NULL → false at line ~1494, so the practical
2622
        // effect is "row excluded" — matches SQLite.
2623
        Expr::InList {
2624
            expr: lhs,
1✔
2625
            list,
1✔
2626
            negated,
1✔
2627
        } => eval_in_list(scope, lhs, list, *negated),
2✔
2628
        Expr::InSubquery { .. } => Err(SQLRiteError::NotImplemented(
×
2629
            "IN (subquery) is not supported (only literal lists are)".to_string(),
×
2630
        )),
2631

2632
        // Phase 7b — function-call dispatch. Currently only the three
2633
        // vector-distance functions; this match arm becomes the single
2634
        // place to register more SQL functions later (e.g. abs(),
2635
        // length(), …) without re-touching the rest of the executor.
2636
        //
2637
        // Operator forms (`<->` `<=>` `<#>`) are NOT plumbed here: two
2638
        // of three don't parse natively in sqlparser (we'd need a
2639
        // string-preprocessing pass or a sqlparser fork). Deferred to
2640
        // a follow-up sub-phase; see docs/phase-7-plan.md's "Scope
2641
        // corrections" note.
2642
        Expr::Function(func) => eval_function(func, scope),
1✔
2643

2644
        other => Err(SQLRiteError::NotImplemented(format!(
×
2645
            "unsupported expression in WHERE/projection: {other:?}"
2646
        ))),
2647
    }
2648
}
2649

2650
/// Dispatches an `Expr::Function` to its built-in implementation.
2651
/// Currently only the three vec_distance_* functions; other functions
2652
/// surface as `NotImplemented` errors with the function name in the
2653
/// message so users see what they tried.
2654
fn eval_function(func: &sqlparser::ast::Function, scope: &dyn RowScope) -> Result<Value> {
1✔
2655
    // Function name lives in `name.0[0]` for unqualified calls. Anything
2656
    // qualified (e.g. `pkg.fn(...)`) falls through to NotImplemented.
2657
    let name = match func.name.0.as_slice() {
2✔
2658
        [ObjectNamePart::Identifier(ident)] => ident.value.to_lowercase(),
2✔
2659
        _ => {
2660
            return Err(SQLRiteError::NotImplemented(format!(
×
2661
                "qualified function names not supported: {:?}",
2662
                func.name
2663
            )));
2664
        }
2665
    };
2666

2667
    match name.as_str() {
2✔
2668
        "vec_distance_l2" | "vec_distance_cosine" | "vec_distance_dot" => {
2✔
2669
            let (a, b) = extract_two_vector_args(&name, &func.args, scope)?;
3✔
2670
            let dist = match name.as_str() {
2✔
2671
                "vec_distance_l2" => vec_distance_l2(&a, &b),
3✔
2672
                "vec_distance_cosine" => vec_distance_cosine(&a, &b)?,
4✔
2673
                "vec_distance_dot" => vec_distance_dot(&a, &b),
3✔
2674
                _ => unreachable!(),
2675
            };
2676
            // Widen f32 → f64 for the runtime Value. Vectors are stored
2677
            // as f32 (consistent with industry convention for embeddings),
2678
            // but the executor's numeric type is f64 so distances slot
2679
            // into Value::Real cleanly and can be compared / ordered with
2680
            // other reals via the existing arithmetic + comparison paths.
2681
            Ok(Value::Real(dist as f64))
1✔
2682
        }
2683
        // Phase 7e — JSON functions. All four parse the JSON text on
2684
        // demand (we don't cache parsed values), then resolve a path
2685
        // (default `$` = root). The path resolver handles `.key` for
2686
        // object access and `[N]` for array index. SQLite-style.
2687
        "json_extract" => json_fn_extract(&name, &func.args, scope),
3✔
2688
        "json_type" => json_fn_type(&name, &func.args, scope),
4✔
2689
        "json_array_length" => json_fn_array_length(&name, &func.args, scope),
4✔
2690
        "json_object_keys" => json_fn_object_keys(&name, &func.args, scope),
2✔
2691
        // Phase 8b — FTS scalars. Both consult an FTS index attached to
2692
        // the named column; both error if no index exists (the index is
2693
        // a hard prerequisite, mirroring SQLite FTS5's MATCH).
2694
        //
2695
        // SQLR-5 — these only work in a single-table scope because they
2696
        // need the owning `Table` to look up an FTS index by name and
2697
        // they key results by the row's rowid. In a joined query the
2698
        // index lookup would be ambiguous (which table's FTS?) and the
2699
        // scoring rowid is per-table. Reject up front rather than
2700
        // silently wrong-result.
2701
        "fts_match" | "bm25_score" => {
3✔
2702
            let Some((table, rowid)) = scope.single_table_view() else {
2✔
2703
                return Err(SQLRiteError::NotImplemented(format!(
×
2704
                    "{name}() is not yet supported inside a JOIN query — \
2705
                     use it on a single-table SELECT or move the FTS lookup into a subquery"
2706
                )));
2707
            };
2708
            let (entry, query) = resolve_fts_args(&name, &func.args, table, scope)?;
3✔
2709
            Ok(match name.as_str() {
3✔
2710
                "fts_match" => Value::Bool(entry.index.matches(rowid, &query)),
3✔
2711
                "bm25_score" => {
2✔
2712
                    Value::Real(entry.index.score(rowid, &query, &Bm25Params::default()))
1✔
2713
                }
2714
                _ => unreachable!(),
×
2715
            })
2716
        }
2717
        // SQLR-3: catch aggregate names used in scalar position (e.g.
2718
        // `WHERE COUNT(*) > 1`) with a clearer message than "unknown
2719
        // function".
2720
        "count" | "sum" | "avg" | "min" | "max" => Err(SQLRiteError::NotImplemented(format!(
2✔
2721
            "aggregate function '{name}' is not allowed in WHERE / projection-scalar position; \
2722
             use it as a top-level projection item or in HAVING"
2723
        ))),
2724
        other => Err(SQLRiteError::NotImplemented(format!(
1✔
2725
            "unknown function: {other}(...)"
2726
        ))),
2727
    }
2728
}
2729

2730
/// Helper for `fts_match` / `bm25_score`: pull the column reference out
2731
/// of arg 0 (a bare identifier — we need the *name*, not the per-row
2732
/// value), evaluate arg 1 as a Text query string, and look up the FTS
2733
/// index attached to that column. Errors if any step fails.
2734
fn resolve_fts_args<'t>(
1✔
2735
    fn_name: &str,
2736
    args: &FunctionArguments,
2737
    table: &'t Table,
2738
    scope: &dyn RowScope,
2739
) -> Result<(&'t FtsIndexEntry, String)> {
2740
    let arg_list = match args {
1✔
2741
        FunctionArguments::List(l) => &l.args,
1✔
2742
        _ => {
2743
            return Err(SQLRiteError::General(format!(
×
2744
                "{fn_name}() expects exactly two arguments: (column, query_text)"
2745
            )));
2746
        }
2747
    };
2748
    if arg_list.len() != 2 {
1✔
2749
        return Err(SQLRiteError::General(format!(
×
2750
            "{fn_name}() expects exactly 2 arguments, got {}",
2751
            arg_list.len()
×
2752
        )));
2753
    }
2754

2755
    // Arg 0: bare column identifier. Must resolve syntactically to a
2756
    // column name (we can't accept arbitrary expressions because we
2757
    // need the column to look up the index, not the column's value).
2758
    let col_expr = match &arg_list[0] {
2✔
2759
        FunctionArg::Unnamed(FunctionArgExpr::Expr(e)) => e,
1✔
2760
        other => {
×
2761
            return Err(SQLRiteError::NotImplemented(format!(
×
2762
                "{fn_name}() argument 0 must be a column name, got {other:?}"
2763
            )));
2764
        }
2765
    };
2766
    let col_name = match col_expr {
1✔
2767
        Expr::Identifier(ident) => ident.value.clone(),
1✔
2768
        Expr::CompoundIdentifier(parts) => parts
×
2769
            .last()
×
2770
            .map(|p| p.value.clone())
×
2771
            .ok_or_else(|| SQLRiteError::Internal("empty compound identifier".to_string()))?,
×
2772
        other => {
×
2773
            return Err(SQLRiteError::General(format!(
×
2774
                "{fn_name}() argument 0 must be a column reference, got {other:?}"
2775
            )));
2776
        }
2777
    };
2778

2779
    // Arg 1: query string. Evaluated through the normal expression
2780
    // pipeline so callers can pass a literal `'rust db'` or an
2781
    // expression that yields TEXT.
2782
    let q_expr = match &arg_list[1] {
2✔
2783
        FunctionArg::Unnamed(FunctionArgExpr::Expr(e)) => e,
1✔
2784
        other => {
×
2785
            return Err(SQLRiteError::NotImplemented(format!(
×
2786
                "{fn_name}() argument 1 must be a text expression, got {other:?}"
2787
            )));
2788
        }
2789
    };
2790
    let query = match eval_expr_scope(q_expr, scope)? {
1✔
2791
        Value::Text(s) => s,
1✔
2792
        other => {
×
2793
            return Err(SQLRiteError::General(format!(
×
2794
                "{fn_name}() argument 1 must be TEXT, got {}",
2795
                other.to_display_string()
×
2796
            )));
2797
        }
2798
    };
2799

2800
    let entry = table
4✔
2801
        .fts_indexes
2802
        .iter()
1✔
2803
        .find(|e| e.column_name == col_name)
3✔
2804
        .ok_or_else(|| {
2✔
2805
            SQLRiteError::General(format!(
1✔
2806
                "{fn_name}({col_name}, ...): no FTS index on column '{col_name}' \
2807
                 (run CREATE INDEX <name> ON <table> USING fts({col_name}) first)"
2808
            ))
2809
        })?;
2810
    Ok((entry, query))
1✔
2811
}
2812

2813
// -----------------------------------------------------------------
2814
// Phase 7e — JSON path-extraction functions
2815
// -----------------------------------------------------------------
2816

2817
/// Extracts the JSON-typed text + optional path string out of a
2818
/// function call's args. Used by all four json_* functions.
2819
///
2820
/// Arity rules (matching SQLite JSON1):
2821
///   - 1 arg  → JSON value, path defaults to `$` (root)
2822
///   - 2 args → (JSON value, path text)
2823
///
2824
/// Returns `(json_text, path)` so caller can serde_json::from_str
2825
/// + walk_json_path on it.
2826
fn extract_json_and_path(
1✔
2827
    fn_name: &str,
2828
    args: &FunctionArguments,
2829
    scope: &dyn RowScope,
2830
) -> Result<(String, String)> {
2831
    let arg_list = match args {
1✔
2832
        FunctionArguments::List(l) => &l.args,
1✔
2833
        _ => {
2834
            return Err(SQLRiteError::General(format!(
×
2835
                "{fn_name}() expects 1 or 2 arguments"
2836
            )));
2837
        }
2838
    };
2839
    if !(arg_list.len() == 1 || arg_list.len() == 2) {
2✔
2840
        return Err(SQLRiteError::General(format!(
×
2841
            "{fn_name}() expects 1 or 2 arguments, got {}",
2842
            arg_list.len()
×
2843
        )));
2844
    }
2845
    // Evaluate first arg → must produce text.
2846
    let first_expr = match &arg_list[0] {
2✔
2847
        FunctionArg::Unnamed(FunctionArgExpr::Expr(e)) => e,
1✔
2848
        other => {
×
2849
            return Err(SQLRiteError::NotImplemented(format!(
×
2850
                "{fn_name}() argument 0 has unsupported shape: {other:?}"
2851
            )));
2852
        }
2853
    };
2854
    let json_text = match eval_expr_scope(first_expr, scope)? {
1✔
2855
        Value::Text(s) => s,
1✔
2856
        Value::Null => {
2857
            return Err(SQLRiteError::General(format!(
×
2858
                "{fn_name}() called on NULL — JSON column has no value for this row"
2859
            )));
2860
        }
2861
        other => {
×
2862
            return Err(SQLRiteError::General(format!(
×
2863
                "{fn_name}() argument 0 is not JSON-typed: got {}",
2864
                other.to_display_string()
×
2865
            )));
2866
        }
2867
    };
2868

2869
    // Path defaults to root `$` when omitted.
2870
    let path = if arg_list.len() == 2 {
2✔
2871
        let path_expr = match &arg_list[1] {
2✔
2872
            FunctionArg::Unnamed(FunctionArgExpr::Expr(e)) => e,
1✔
2873
            other => {
×
2874
                return Err(SQLRiteError::NotImplemented(format!(
×
2875
                    "{fn_name}() argument 1 has unsupported shape: {other:?}"
2876
                )));
2877
            }
2878
        };
2879
        match eval_expr_scope(path_expr, scope)? {
1✔
2880
            Value::Text(s) => s,
1✔
2881
            other => {
×
2882
                return Err(SQLRiteError::General(format!(
×
2883
                    "{fn_name}() path argument must be a string literal, got {}",
2884
                    other.to_display_string()
×
2885
                )));
2886
            }
2887
        }
2888
    } else {
2889
        "$".to_string()
×
2890
    };
2891

2892
    Ok((json_text, path))
1✔
2893
}
2894

2895
/// Walks a `serde_json::Value` along a JSONPath subset:
2896
///   - `$` is the root
2897
///   - `.key` for object access (key may not contain `.` or `[`)
2898
///   - `[N]` for array index (N a non-negative integer)
2899
///   - chains arbitrarily: `$.foo.bar[0].baz`
2900
///
2901
/// Returns `Ok(None)` for "path didn't match anything" (NULL in SQL),
2902
/// `Err` for malformed paths. Matches SQLite JSON1's semantic
2903
/// distinction: missing-key = NULL, malformed-path = error.
2904
fn walk_json_path<'a>(
1✔
2905
    value: &'a serde_json::Value,
2906
    path: &str,
2907
) -> Result<Option<&'a serde_json::Value>> {
2908
    let mut chars = path.chars().peekable();
1✔
2909
    if chars.next() != Some('$') {
1✔
2910
        return Err(SQLRiteError::General(format!(
1✔
2911
            "JSON path must start with '$', got `{path}`"
2912
        )));
2913
    }
2914
    let mut current = value;
1✔
2915
    while let Some(&c) = chars.peek() {
2✔
2916
        match c {
1✔
2917
            '.' => {
2918
                chars.next();
1✔
2919
                let mut key = String::new();
1✔
2920
                while let Some(&c) = chars.peek() {
2✔
2921
                    if c == '.' || c == '[' {
2✔
2922
                        break;
2923
                    }
2924
                    key.push(c);
1✔
2925
                    chars.next();
1✔
2926
                }
2927
                if key.is_empty() {
2✔
2928
                    return Err(SQLRiteError::General(format!(
×
2929
                        "JSON path has empty key after '.' in `{path}`"
2930
                    )));
2931
                }
2932
                match current.get(&key) {
2✔
2933
                    Some(v) => current = v,
1✔
2934
                    None => return Ok(None),
1✔
2935
                }
2936
            }
2937
            '[' => {
2938
                chars.next();
1✔
2939
                let mut idx_str = String::new();
1✔
2940
                while let Some(&c) = chars.peek() {
2✔
2941
                    if c == ']' {
1✔
2942
                        break;
2943
                    }
2944
                    idx_str.push(c);
1✔
2945
                    chars.next();
1✔
2946
                }
2947
                if chars.next() != Some(']') {
2✔
2948
                    return Err(SQLRiteError::General(format!(
×
2949
                        "JSON path has unclosed `[` in `{path}`"
2950
                    )));
2951
                }
2952
                let idx: usize = idx_str.trim().parse().map_err(|_| {
2✔
2953
                    SQLRiteError::General(format!(
×
2954
                        "JSON path has non-integer index `[{idx_str}]` in `{path}`"
2955
                    ))
2956
                })?;
2957
                match current.get(idx) {
1✔
2958
                    Some(v) => current = v,
1✔
2959
                    None => return Ok(None),
×
2960
                }
2961
            }
2962
            other => {
×
2963
                return Err(SQLRiteError::General(format!(
×
2964
                    "JSON path has unexpected character `{other}` in `{path}` \
2965
                     (expected `.`, `[`, or end-of-path)"
2966
                )));
2967
            }
2968
        }
2969
    }
2970
    Ok(Some(current))
1✔
2971
}
2972

2973
/// Converts a serde_json scalar to a SQLRite Value. For composite
2974
/// types (object, array) returns the JSON-encoded text — callers
2975
/// pattern-match on shape from the calling json_* function.
2976
fn json_value_to_sql(v: &serde_json::Value) -> Value {
1✔
2977
    match v {
1✔
2978
        serde_json::Value::Null => Value::Null,
×
2979
        serde_json::Value::Bool(b) => Value::Bool(*b),
×
2980
        serde_json::Value::Number(n) => {
1✔
2981
            // Match SQLite: integer if it fits an i64, else f64.
2982
            if let Some(i) = n.as_i64() {
3✔
2983
                Value::Integer(i)
1✔
2984
            } else if let Some(f) = n.as_f64() {
×
2985
                Value::Real(f)
×
2986
            } else {
2987
                Value::Null
×
2988
            }
2989
        }
2990
        serde_json::Value::String(s) => Value::Text(s.clone()),
1✔
2991
        // Objects + arrays come out as JSON-encoded text. Same as
2992
        // SQLite's json_extract: composite results round-trip through
2993
        // text rather than being modeled as a richer Value type.
2994
        composite => Value::Text(composite.to_string()),
×
2995
    }
2996
}
2997

2998
fn json_fn_extract(name: &str, args: &FunctionArguments, scope: &dyn RowScope) -> Result<Value> {
1✔
2999
    let (json_text, path) = extract_json_and_path(name, args, scope)?;
1✔
3000
    let parsed: serde_json::Value = serde_json::from_str(&json_text).map_err(|e| {
2✔
3001
        SQLRiteError::General(format!("{name}() got invalid JSON `{json_text}`: {e}"))
×
3002
    })?;
3003
    match walk_json_path(&parsed, &path)? {
2✔
3004
        Some(v) => Ok(json_value_to_sql(v)),
2✔
3005
        None => Ok(Value::Null),
1✔
3006
    }
3007
}
3008

3009
fn json_fn_type(name: &str, args: &FunctionArguments, scope: &dyn RowScope) -> Result<Value> {
1✔
3010
    let (json_text, path) = extract_json_and_path(name, args, scope)?;
1✔
3011
    let parsed: serde_json::Value = serde_json::from_str(&json_text).map_err(|e| {
2✔
3012
        SQLRiteError::General(format!("{name}() got invalid JSON `{json_text}`: {e}"))
×
3013
    })?;
3014
    let resolved = match walk_json_path(&parsed, &path)? {
2✔
3015
        Some(v) => v,
1✔
3016
        None => return Ok(Value::Null),
×
3017
    };
3018
    let ty = match resolved {
2✔
3019
        serde_json::Value::Null => "null",
1✔
3020
        serde_json::Value::Bool(true) => "true",
1✔
3021
        serde_json::Value::Bool(false) => "false",
×
3022
        serde_json::Value::Number(n) => {
1✔
3023
            if n.is_i64() || n.is_u64() {
4✔
3024
                "integer"
1✔
3025
            } else {
3026
                "real"
1✔
3027
            }
3028
        }
3029
        serde_json::Value::String(_) => "text",
1✔
3030
        serde_json::Value::Array(_) => "array",
1✔
3031
        serde_json::Value::Object(_) => "object",
1✔
3032
    };
3033
    Ok(Value::Text(ty.to_string()))
2✔
3034
}
3035

3036
fn json_fn_array_length(
1✔
3037
    name: &str,
3038
    args: &FunctionArguments,
3039
    scope: &dyn RowScope,
3040
) -> Result<Value> {
3041
    let (json_text, path) = extract_json_and_path(name, args, scope)?;
1✔
3042
    let parsed: serde_json::Value = serde_json::from_str(&json_text).map_err(|e| {
2✔
3043
        SQLRiteError::General(format!("{name}() got invalid JSON `{json_text}`: {e}"))
×
3044
    })?;
3045
    let resolved = match walk_json_path(&parsed, &path)? {
2✔
3046
        Some(v) => v,
1✔
3047
        None => return Ok(Value::Null),
×
3048
    };
3049
    match resolved.as_array() {
2✔
3050
        Some(arr) => Ok(Value::Integer(arr.len() as i64)),
2✔
3051
        None => Err(SQLRiteError::General(format!(
1✔
3052
            "{name}() resolved to a non-array value at path `{path}`"
3053
        ))),
3054
    }
3055
}
3056

3057
fn json_fn_object_keys(
×
3058
    name: &str,
3059
    args: &FunctionArguments,
3060
    scope: &dyn RowScope,
3061
) -> Result<Value> {
3062
    let (json_text, path) = extract_json_and_path(name, args, scope)?;
×
3063
    let parsed: serde_json::Value = serde_json::from_str(&json_text).map_err(|e| {
×
3064
        SQLRiteError::General(format!("{name}() got invalid JSON `{json_text}`: {e}"))
×
3065
    })?;
3066
    let resolved = match walk_json_path(&parsed, &path)? {
×
3067
        Some(v) => v,
×
3068
        None => return Ok(Value::Null),
×
3069
    };
3070
    let obj = resolved.as_object().ok_or_else(|| {
×
3071
        SQLRiteError::General(format!(
×
3072
            "{name}() resolved to a non-object value at path `{path}`"
3073
        ))
3074
    })?;
3075
    // SQLite's json_object_keys is a table-valued function (one row
3076
    // per key). Without set-returning function support we can't
3077
    // reproduce that shape; instead return the keys as a JSON array
3078
    // text. Caller can iterate via json_array_length + json_extract,
3079
    // or just treat it as a serialized list. Document this divergence
3080
    // in supported-sql.md.
3081
    let keys: Vec<serde_json::Value> = obj
3082
        .keys()
3083
        .map(|k| serde_json::Value::String(k.clone()))
×
3084
        .collect();
3085
    Ok(Value::Text(serde_json::Value::Array(keys).to_string()))
×
3086
}
3087

3088
/// Extracts exactly two `Vec<f32>` arguments from a function call,
3089
/// validating arity and that both sides are Vector-typed with matching
3090
/// dimensions. Used by all three vec_distance_* functions.
3091
fn extract_two_vector_args(
1✔
3092
    fn_name: &str,
3093
    args: &FunctionArguments,
3094
    scope: &dyn RowScope,
3095
) -> Result<(Vec<f32>, Vec<f32>)> {
3096
    let arg_list = match args {
1✔
3097
        FunctionArguments::List(l) => &l.args,
1✔
3098
        _ => {
3099
            return Err(SQLRiteError::General(format!(
×
3100
                "{fn_name}() expects exactly two vector arguments"
3101
            )));
3102
        }
3103
    };
3104
    if arg_list.len() != 2 {
1✔
3105
        return Err(SQLRiteError::General(format!(
×
3106
            "{fn_name}() expects exactly 2 arguments, got {}",
3107
            arg_list.len()
×
3108
        )));
3109
    }
3110
    let mut out: Vec<Vec<f32>> = Vec::with_capacity(2);
1✔
3111
    for (i, arg) in arg_list.iter().enumerate() {
3✔
3112
        let expr = match arg {
2✔
3113
            FunctionArg::Unnamed(FunctionArgExpr::Expr(e)) => e,
1✔
3114
            other => {
×
3115
                return Err(SQLRiteError::NotImplemented(format!(
×
3116
                    "{fn_name}() argument {i} has unsupported shape: {other:?}"
3117
                )));
3118
            }
3119
        };
3120
        let val = eval_expr_scope(expr, scope)?;
1✔
3121
        match val {
1✔
3122
            Value::Vector(v) => out.push(v),
1✔
3123
            other => {
×
3124
                return Err(SQLRiteError::General(format!(
×
3125
                    "{fn_name}() argument {i} is not a vector: got {}",
3126
                    other.to_display_string()
×
3127
                )));
3128
            }
3129
        }
3130
    }
3131
    let b = out.pop().unwrap();
1✔
3132
    let a = out.pop().unwrap();
2✔
3133
    if a.len() != b.len() {
2✔
3134
        return Err(SQLRiteError::General(format!(
1✔
3135
            "{fn_name}(): vector dimensions don't match (lhs={}, rhs={})",
3136
            a.len(),
2✔
3137
            b.len()
1✔
3138
        )));
3139
    }
3140
    Ok((a, b))
1✔
3141
}
3142

3143
/// Euclidean (L2) distance: √Σ(aᵢ − bᵢ)².
3144
/// Smaller-is-closer; identical vectors return 0.0.
3145
pub(crate) fn vec_distance_l2(a: &[f32], b: &[f32]) -> f32 {
1✔
3146
    debug_assert_eq!(a.len(), b.len());
1✔
3147
    let mut sum = 0.0f32;
1✔
3148
    for i in 0..a.len() {
2✔
3149
        let d = a[i] - b[i];
2✔
3150
        sum += d * d;
1✔
3151
    }
3152
    sum.sqrt()
1✔
3153
}
3154

3155
/// Cosine distance: 1 − (a·b) / (‖a‖·‖b‖).
3156
/// Smaller-is-closer; identical (non-zero) vectors return 0.0,
3157
/// orthogonal vectors return 1.0, opposite-direction vectors return 2.0.
3158
///
3159
/// Errors if either vector has zero magnitude — cosine similarity is
3160
/// undefined for the zero vector and silently returning NaN would
3161
/// poison `ORDER BY` ranking. Callers who want the silent-NaN
3162
/// behavior can compute `vec_distance_dot(a, b) / (norm(a) * norm(b))`
3163
/// themselves.
3164
pub(crate) fn vec_distance_cosine(a: &[f32], b: &[f32]) -> Result<f32> {
1✔
3165
    debug_assert_eq!(a.len(), b.len());
1✔
3166
    let mut dot = 0.0f32;
1✔
3167
    let mut norm_a_sq = 0.0f32;
1✔
3168
    let mut norm_b_sq = 0.0f32;
1✔
3169
    for i in 0..a.len() {
2✔
3170
        dot += a[i] * b[i];
2✔
3171
        norm_a_sq += a[i] * a[i];
2✔
3172
        norm_b_sq += b[i] * b[i];
2✔
3173
    }
3174
    let denom = (norm_a_sq * norm_b_sq).sqrt();
1✔
3175
    if denom == 0.0 {
1✔
3176
        return Err(SQLRiteError::General(
1✔
3177
            "vec_distance_cosine() is undefined for zero-magnitude vectors".to_string(),
1✔
3178
        ));
3179
    }
3180
    Ok(1.0 - dot / denom)
1✔
3181
}
3182

3183
/// Negated dot product: −(a·b).
3184
/// pgvector convention — negated so smaller-is-closer like L2 / cosine.
3185
/// For unit-norm vectors `vec_distance_dot(a, b) == vec_distance_cosine(a, b) - 1`.
3186
pub(crate) fn vec_distance_dot(a: &[f32], b: &[f32]) -> f32 {
1✔
3187
    debug_assert_eq!(a.len(), b.len());
1✔
3188
    let mut dot = 0.0f32;
1✔
3189
    for i in 0..a.len() {
2✔
3190
        dot += a[i] * b[i];
2✔
3191
    }
3192
    -dot
1✔
3193
}
3194

3195
/// Evaluates an integer/real arithmetic op. NULL on either side propagates.
3196
/// Mixed Integer/Real promotes to Real. Divide/Modulo by zero → error.
3197
fn eval_arith(op: &BinaryOperator, l: &Value, r: &Value) -> Result<Value> {
1✔
3198
    if matches!(l, Value::Null) || matches!(r, Value::Null) {
1✔
3199
        return Ok(Value::Null);
×
3200
    }
3201
    match (l, r) {
1✔
3202
        (Value::Integer(a), Value::Integer(b)) => match op {
1✔
3203
            BinaryOperator::Plus => Ok(Value::Integer(a.wrapping_add(*b))),
1✔
3204
            BinaryOperator::Minus => Ok(Value::Integer(a.wrapping_sub(*b))),
×
3205
            BinaryOperator::Multiply => Ok(Value::Integer(a.wrapping_mul(*b))),
1✔
3206
            BinaryOperator::Divide => {
3207
                if *b == 0 {
×
3208
                    Err(SQLRiteError::General("division by zero".to_string()))
×
3209
                } else {
3210
                    Ok(Value::Integer(a / b))
×
3211
                }
3212
            }
3213
            BinaryOperator::Modulo => {
3214
                if *b == 0 {
×
3215
                    Err(SQLRiteError::General("modulo by zero".to_string()))
×
3216
                } else {
3217
                    Ok(Value::Integer(a % b))
×
3218
                }
3219
            }
3220
            _ => unreachable!(),
3221
        },
3222
        // Anything involving a Real promotes both sides to f64.
3223
        (a, b) => {
×
3224
            let af = as_number(a)?;
×
3225
            let bf = as_number(b)?;
×
3226
            match op {
×
3227
                BinaryOperator::Plus => Ok(Value::Real(af + bf)),
×
3228
                BinaryOperator::Minus => Ok(Value::Real(af - bf)),
×
3229
                BinaryOperator::Multiply => Ok(Value::Real(af * bf)),
×
3230
                BinaryOperator::Divide => {
3231
                    if bf == 0.0 {
×
3232
                        Err(SQLRiteError::General("division by zero".to_string()))
×
3233
                    } else {
3234
                        Ok(Value::Real(af / bf))
×
3235
                    }
3236
                }
3237
                BinaryOperator::Modulo => {
3238
                    if bf == 0.0 {
×
3239
                        Err(SQLRiteError::General("modulo by zero".to_string()))
×
3240
                    } else {
3241
                        Ok(Value::Real(af % bf))
×
3242
                    }
3243
                }
3244
                _ => unreachable!(),
3245
            }
3246
        }
3247
    }
3248
}
3249

3250
fn as_number(v: &Value) -> Result<f64> {
×
3251
    match v {
×
3252
        Value::Integer(i) => Ok(*i as f64),
×
3253
        Value::Real(f) => Ok(*f),
×
3254
        Value::Bool(b) => Ok(if *b { 1.0 } else { 0.0 }),
×
3255
        other => Err(SQLRiteError::General(format!(
×
3256
            "arithmetic on non-numeric value '{}'",
3257
            other.to_display_string()
×
3258
        ))),
3259
    }
3260
}
3261

3262
fn as_bool(v: &Value) -> Result<bool> {
1✔
3263
    match v {
1✔
3264
        Value::Bool(b) => Ok(*b),
1✔
3265
        Value::Null => Ok(false),
3266
        Value::Integer(i) => Ok(*i != 0),
×
3267
        other => Err(SQLRiteError::Internal(format!(
×
3268
            "expected boolean, got {}",
3269
            other.to_display_string()
×
3270
        ))),
3271
    }
3272
}
3273

3274
// -----------------------------------------------------------------
3275
// SQLR-3 — LIKE / IN evaluators
3276
// -----------------------------------------------------------------
3277

3278
#[allow(clippy::too_many_arguments)]
3279
fn eval_like(
1✔
3280
    scope: &dyn RowScope,
3281
    negated: bool,
3282
    any: bool,
3283
    lhs: &Expr,
3284
    pattern: &Expr,
3285
    escape_char: Option<&AstValue>,
3286
    case_insensitive: bool,
3287
) -> Result<Value> {
3288
    if any {
1✔
3289
        return Err(SQLRiteError::NotImplemented(
×
3290
            "LIKE ANY (...) is not supported".to_string(),
×
3291
        ));
3292
    }
3293
    if escape_char.is_some() {
1✔
3294
        return Err(SQLRiteError::NotImplemented(
×
3295
            "LIKE ... ESCAPE '<char>' is not supported (default `\\` escape only)".to_string(),
×
3296
        ));
3297
    }
3298

3299
    let l = eval_expr_scope(lhs, scope)?;
1✔
3300
    let p = eval_expr_scope(pattern, scope)?;
2✔
3301
    if matches!(l, Value::Null) || matches!(p, Value::Null) {
1✔
3302
        return Ok(Value::Null);
×
3303
    }
3304
    let text = match l {
1✔
3305
        Value::Text(s) => s,
1✔
3306
        other => other.to_display_string(),
×
3307
    };
3308
    let pat = match p {
1✔
3309
        Value::Text(s) => s,
1✔
3310
        other => other.to_display_string(),
×
3311
    };
3312
    let m = like_match(&text, &pat, case_insensitive);
2✔
3313
    Ok(Value::Bool(if negated { !m } else { m }))
1✔
3314
}
3315

3316
fn eval_in_list(scope: &dyn RowScope, lhs: &Expr, list: &[Expr], negated: bool) -> Result<Value> {
2✔
3317
    let l = eval_expr_scope(lhs, scope)?;
1✔
3318
    if matches!(l, Value::Null) {
1✔
3319
        return Ok(Value::Null);
×
3320
    }
3321
    let mut saw_null = false;
1✔
3322
    for item in list {
2✔
3323
        let r = eval_expr_scope(item, scope)?;
2✔
3324
        if matches!(r, Value::Null) {
1✔
3325
            saw_null = true;
1✔
3326
            continue;
3327
        }
3328
        if compare_values(Some(&l), Some(&r)) == Ordering::Equal {
2✔
3329
            return Ok(Value::Bool(!negated));
1✔
3330
        }
3331
    }
3332
    if saw_null {
2✔
3333
        // SQLite three-valued IN: unmatched + a NULL on the RHS → NULL.
3334
        // WHERE coerces NULL → false, so the row is excluded either way.
3335
        Ok(Value::Null)
1✔
3336
    } else {
3337
        Ok(Value::Bool(negated))
1✔
3338
    }
3339
}
3340

3341
// -----------------------------------------------------------------
3342
// SQLR-3 — Aggregation phase, DISTINCT, post-projection sort
3343
// -----------------------------------------------------------------
3344

3345
/// SQLR-52 — HAVING lowering, shared by the single-table and joined
3346
/// aggregation paths. The expression may reference aggregates and
3347
/// GROUP BY keys that aren't in the SELECT output (SQLite allows
3348
/// both: `SELECT dept FROM t GROUP BY dept HAVING COUNT(*) > 1`).
3349
/// We append those as *hidden* trailing projection slots so the
3350
/// `aggregate_rows` accumulator computes them alongside the visible
3351
/// ones; the pipeline strips them after filtering. Aggregate calls in
3352
/// the HAVING tree are lowered to identifiers naming their output slot
3353
/// (`COUNT(*)` → identifier "COUNT(*)") so the shared expression
3354
/// evaluator can resolve them through a `GroupRowScope` like any other
3355
/// column. Returns the widened projection list plus the lowered
3356
/// HAVING expression (`None` when the query has no HAVING).
3357
fn lower_having_into_hidden_slots(
1✔
3358
    query: &SelectQuery,
3359
    proj_items: &[ProjectionItem],
3360
) -> Result<(Vec<ProjectionItem>, Option<Expr>)> {
3361
    let mut all_items = proj_items.to_vec();
1✔
3362
    let having_expr = match &query.having {
1✔
3363
        Some(h) => {
1✔
3364
            for g in &query.group_by {
2✔
3365
                if !all_items
3✔
3366
                    .iter()
1✔
3367
                    .any(|i| i.output_name().eq_ignore_ascii_case(&g.name))
3✔
3368
                {
3369
                    all_items.push(ProjectionItem {
1✔
3370
                        kind: ProjectionKind::Column {
1✔
3371
                            qualifier: g.qualifier.clone(),
1✔
3372
                            name: g.name.clone(),
1✔
3373
                        },
3374
                        alias: None,
1✔
3375
                    });
3376
                }
3377
            }
3378
            Some(lower_having_expr(h, &mut all_items)?)
1✔
3379
        }
3380
        None => None,
1✔
3381
    };
3382
    Ok((all_items, having_expr))
1✔
3383
}
3384

3385
/// The aggregation tail shared by the single-table and joined SELECT
3386
/// paths: accumulate groups over the row scopes, apply HAVING, strip
3387
/// hidden HAVING-only slots, then DISTINCT / ORDER BY / LIMIT on the
3388
/// output rows. Callers validate column references against their own
3389
/// scope (table schema vs. joined-table list) before invoking.
3390
fn run_aggregation_pipeline<S: RowScope>(
2✔
3391
    scopes: impl IntoIterator<Item = S>,
3392
    query: &SelectQuery,
3393
    proj_items: &[ProjectionItem],
3394
    all_items: &[ProjectionItem],
3395
    having_expr: &Option<Expr>,
3396
) -> Result<SelectResult> {
3397
    let columns: Vec<String> = proj_items.iter().map(|i| i.output_name()).collect();
8✔
3398
    let mut rows = aggregate_rows(scopes, &query.group_by, all_items)?;
5✔
3399

3400
    if let Some(h) = having_expr {
2✔
3401
        let all_columns: Vec<String> = all_items.iter().map(|i| i.output_name()).collect();
8✔
3402
        rows = filter_groups_by_having(rows, h, &all_columns)?;
4✔
3403
    }
3404
    // Drop the hidden HAVING-only slots back to the user-visible width.
3405
    if all_items.len() > proj_items.len() {
2✔
3406
        for row in &mut rows {
1✔
3407
            row.truncate(proj_items.len());
2✔
3408
        }
3409
    }
3410

3411
    if query.distinct {
2✔
3412
        rows = dedupe_rows(rows);
×
3413
    }
3414

3415
    if let Some(order) = &query.order_by {
4✔
3416
        sort_output_rows(&mut rows, &columns, proj_items, order)?;
4✔
3417
    }
3418
    if let Some(k) = query.limit {
3✔
3419
        rows.truncate(k);
2✔
3420
    }
3421

3422
    Ok(SelectResult { columns, rows })
2✔
3423
}
3424

3425
/// Walk the row scopes, partition into groups (one synthetic group
3426
/// when `group_by` is empty), update one `AggState` per aggregate
3427
/// projection slot per group, then materialize one output row per
3428
/// group in projection order. Group-key columns surface their original
3429
/// `Value` (captured the first time the group was seen); aggregate
3430
/// slots surface `AggState::finalize()`.
3431
///
3432
/// SQLR-6 — generic over [`RowScope`] so the same accumulator serves
3433
/// the single-table path (a [`SingleTableScope`] per matching rowid)
3434
/// and the joined path (a [`JoinedScope`] per joined row, where
3435
/// NULL-padded outer-join sides surface as `Value::Null` — grouped
3436
/// together like any other NULL, and skipped by `COUNT(col)` per the
3437
/// usual NULL-skipping aggregate semantics).
3438
fn aggregate_rows<S: RowScope>(
2✔
3439
    scopes: impl IntoIterator<Item = S>,
3440
    group_by: &[GroupByKey],
3441
    proj_items: &[ProjectionItem],
3442
) -> Result<Vec<Vec<Value>>> {
3443
    // Build the per-projection-slot accumulator template once. Each
3444
    // group clones this template on first sight. Non-aggregate slots
3445
    // hold a "captured group-key value" (`None` until set).
3446
    let template: Vec<Option<AggState>> = proj_items
2✔
3447
        .iter()
3448
        .map(|i| match &i.kind {
6✔
3449
            ProjectionKind::Aggregate(call) => Some(AggState::new(call)),
2✔
3450
            ProjectionKind::Column { .. } => None,
2✔
3451
        })
3452
        .collect();
3453

3454
    // Linear-scan group lookup. For typical ad-hoc queries (cardinality
3455
    // ≪ 10k), this is fine; if grouping cardinality grows, swap to a
3456
    // HashMap<Vec<DistinctKey>, usize> keyed by the same DistinctKey
3457
    // wrapper. Order-preserving for readable output (groups appear in
3458
    // first-occurrence order, matching SQLite's typical behavior).
3459
    let mut keys: Vec<Vec<DistinctKey>> = Vec::new();
2✔
3460
    let mut group_states: Vec<Vec<Option<AggState>>> = Vec::new();
2✔
3461
    let mut group_key_values: Vec<Vec<Value>> = Vec::new();
2✔
3462

3463
    for scope in scopes {
6✔
3464
        let mut key_values: Vec<Value> = Vec::with_capacity(group_by.len());
4✔
3465
        let mut key: Vec<DistinctKey> = Vec::with_capacity(group_by.len());
4✔
3466
        for g in group_by {
6✔
3467
            let v = scope.lookup(g.qualifier.as_deref(), &g.name)?;
4✔
3468
            key.push(DistinctKey::from_value(&v));
4✔
3469
            key_values.push(v);
2✔
3470
        }
3471
        let idx = match keys.iter().position(|k| k == &key) {
6✔
3472
            Some(i) => i,
2✔
3473
            None => {
3474
                keys.push(key);
2✔
3475
                group_states.push(template.clone());
2✔
3476
                group_key_values.push(key_values);
2✔
3477
                keys.len() - 1
2✔
3478
            }
3479
        };
3480

3481
        for (slot, item) in proj_items.iter().enumerate() {
2✔
3482
            if let ProjectionKind::Aggregate(call) = &item.kind {
4✔
3483
                let v = match &call.arg {
2✔
3484
                    AggregateArg::Star => Value::Null,
2✔
3485
                    AggregateArg::Column { qualifier, name } => {
2✔
3486
                        scope.lookup(qualifier.as_deref(), name)?
4✔
3487
                    }
3488
                };
3489
                if let Some(state) = group_states[idx][slot].as_mut() {
4✔
3490
                    state.update(&v)?;
4✔
3491
                }
3492
            }
3493
        }
3494
    }
3495

3496
    // No groups but no aggregate-only "implicit one row" semantic to
3497
    // emit: e.g. `SELECT dept FROM t GROUP BY dept` over an empty
3498
    // matching set should produce zero rows. `SELECT COUNT(*) FROM t`
3499
    // (no GROUP BY) DOES produce one row even on empty input — the
3500
    // single-synthetic-group path below handles it.
3501
    if keys.is_empty() && group_by.is_empty() {
3✔
3502
        // Synthetic single empty group so we still emit one row with
3503
        // initial accumulator finals (e.g. COUNT(*) → 0).
3504
        keys.push(Vec::new());
1✔
3505
        group_states.push(template.clone());
1✔
3506
        group_key_values.push(Vec::new());
1✔
3507
    }
3508

3509
    // Project: one row per group, in projection order.
3510
    let mut rows: Vec<Vec<Value>> = Vec::with_capacity(keys.len());
4✔
3511
    for (group_idx, _) in keys.iter().enumerate() {
6✔
3512
        let mut row: Vec<Value> = Vec::with_capacity(proj_items.len());
4✔
3513
        for (slot, item) in proj_items.iter().enumerate() {
4✔
3514
            match &item.kind {
2✔
3515
                ProjectionKind::Column { qualifier, name: c } => {
2✔
3516
                    // Parser / executor validation ties bare-column
3517
                    // projections to GROUP BY entries, but `SELECT *`
3518
                    // expansions reach here unvalidated — surface a
3519
                    // clean error rather than panicking.
3520
                    let pos = group_by
5✔
3521
                        .iter()
2✔
3522
                        .position(|g| g.matches_column(qualifier.as_deref(), c))
6✔
3523
                        .ok_or_else(|| {
3✔
3524
                            SQLRiteError::Internal(format!(
1✔
3525
                                "column '{c}' must appear in GROUP BY or be used in an \
3526
                                 aggregate function"
3527
                            ))
3528
                        })?;
3529
                    row.push(group_key_values[group_idx][pos].clone());
2✔
3530
                }
3531
                ProjectionKind::Aggregate(_) => {
3532
                    let state = group_states[group_idx][slot]
6✔
3533
                        .as_ref()
3534
                        .expect("aggregate slot has state");
3535
                    row.push(state.finalize());
2✔
3536
                }
3537
            }
3538
        }
3539
        rows.push(row);
2✔
3540
    }
3541
    Ok(rows)
2✔
3542
}
3543

3544
// -----------------------------------------------------------------
3545
// SQLR-52 — HAVING (post-aggregation filter)
3546
// -----------------------------------------------------------------
3547

3548
/// Scope for evaluating a HAVING expression against one group's output
3549
/// row. Column references resolve against the output column names —
3550
/// GROUP BY keys, aggregate aliases, aggregate display forms like
3551
/// `COUNT(*)` (the lowered shape `lower_having_expr` produces), and
3552
/// the hidden HAVING-only slots appended by the executor.
3553
struct GroupRowScope<'a> {
3554
    columns: &'a [String],
3555
    values: &'a [Value],
3556
}
3557

3558
impl RowScope for GroupRowScope<'_> {
3559
    fn lookup(&self, qualifier: Option<&str>, col: &str) -> Result<Value> {
1✔
3560
        // Output columns carry no table qualifier — `t.dept` in HAVING
3561
        // resolves by its column part, same as the aggregating ORDER BY.
3562
        let _ = qualifier;
×
3563
        self.columns
1✔
3564
            .iter()
3565
            .position(|c| c.eq_ignore_ascii_case(col))
3✔
3566
            .map(|i| self.values[i].clone())
3✔
3567
            .ok_or_else(|| {
2✔
3568
                SQLRiteError::Internal(format!(
1✔
3569
                    "HAVING references '{col}', which is neither a GROUP BY column nor an \
×
3570
                     aggregate in scope"
×
3571
                ))
3572
            })
3573
    }
3574

3575
    fn single_table_view(&self) -> Option<(&Table, i64)> {
×
3576
        None
×
3577
    }
3578
}
3579

3580
/// Rewrite a HAVING expression for group-row evaluation: every
3581
/// aggregate call in the tree becomes an identifier naming its output
3582
/// slot (`SUM(salary)` → identifier `"SUM(salary)"`), registering a
3583
/// hidden projection slot for any aggregate not already in the SELECT
3584
/// list so `aggregate_rows` computes it. Non-aggregate functions and
3585
/// leaf expressions pass through untouched — the shared evaluator
3586
/// handles (or rejects) them at filter time.
3587
fn lower_having_expr(expr: &Expr, items: &mut Vec<ProjectionItem>) -> Result<Expr> {
1✔
3588
    Ok(match expr {
2✔
3589
        Expr::Function(func) => {
1✔
3590
            let is_aggregate = matches!(
2✔
3591
                func.name.0.as_slice(),
2✔
3592
                [ObjectNamePart::Identifier(ident)] if AggregateFn::from_name(&ident.value).is_some()
2✔
3593
            );
3594
            if !is_aggregate {
1✔
3595
                return Ok(expr.clone());
×
3596
            }
3597
            let call = parse_aggregate_call(func)?;
2✔
3598
            let display = call.display_name();
2✔
3599
            // Resolvable already? Identifier lookup goes by output
3600
            // column name, so an unaliased projection of the same
3601
            // aggregate (output name == display form) suffices. An
3602
            // *aliased* one doesn't — its output name is the alias —
3603
            // so the call still gets a hidden slot of its own.
3604
            let already_known = items
3✔
3605
                .iter()
1✔
3606
                .any(|i| i.output_name().eq_ignore_ascii_case(&display));
3✔
3607
            if !already_known {
1✔
3608
                items.push(ProjectionItem {
2✔
3609
                    kind: ProjectionKind::Aggregate(call),
1✔
3610
                    alias: None,
1✔
3611
                });
3612
            }
3613
            Expr::Identifier(Ident::new(display))
2✔
3614
        }
3615
        Expr::Nested(inner) => Expr::Nested(Box::new(lower_having_expr(inner, items)?)),
×
3616
        Expr::UnaryOp { op, expr: inner } => Expr::UnaryOp {
×
3617
            op: *op,
×
3618
            expr: Box::new(lower_having_expr(inner, items)?),
×
3619
        },
3620
        Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
2✔
3621
            left: Box::new(lower_having_expr(left, items)?),
2✔
3622
            op: op.clone(),
1✔
3623
            right: Box::new(lower_having_expr(right, items)?),
2✔
3624
        },
3625
        Expr::IsNull(inner) => Expr::IsNull(Box::new(lower_having_expr(inner, items)?)),
×
3626
        Expr::IsNotNull(inner) => Expr::IsNotNull(Box::new(lower_having_expr(inner, items)?)),
×
3627
        Expr::InList {
3628
            expr: lhs,
×
3629
            list,
×
3630
            negated,
×
3631
        } => Expr::InList {
×
3632
            expr: Box::new(lower_having_expr(lhs, items)?),
×
3633
            list: list
×
3634
                .iter()
×
3635
                .map(|e| lower_having_expr(e, items))
×
3636
                .collect::<Result<Vec<_>>>()?,
×
3637
            negated: *negated,
×
3638
        },
3639
        Expr::Like {
3640
            negated,
×
3641
            any,
×
3642
            expr: lhs,
×
3643
            pattern,
×
3644
            escape_char,
×
3645
        } => Expr::Like {
×
3646
            negated: *negated,
×
3647
            any: *any,
×
3648
            expr: Box::new(lower_having_expr(lhs, items)?),
×
3649
            pattern: Box::new(lower_having_expr(pattern, items)?),
×
3650
            escape_char: escape_char.clone(),
×
3651
        },
3652
        Expr::ILike {
3653
            negated,
×
3654
            any,
×
3655
            expr: lhs,
×
3656
            pattern,
×
3657
            escape_char,
×
3658
        } => Expr::ILike {
×
3659
            negated: *negated,
×
3660
            any: *any,
×
3661
            expr: Box::new(lower_having_expr(lhs, items)?),
×
3662
            pattern: Box::new(lower_having_expr(pattern, items)?),
×
3663
            escape_char: escape_char.clone(),
×
3664
        },
3665
        // Leaves (identifiers, literals) and unsupported shapes pass
3666
        // through; the evaluator produces its own error for the latter.
3667
        other => other.clone(),
1✔
3668
    })
3669
}
3670

3671
/// Keep only the groups whose HAVING expression evaluates truthy.
3672
/// NULL collapses to false — same three-valued-logic coercion the
3673
/// WHERE path applies.
3674
fn filter_groups_by_having(
1✔
3675
    rows: Vec<Vec<Value>>,
3676
    having: &Expr,
3677
    columns: &[String],
3678
) -> Result<Vec<Vec<Value>>> {
3679
    let mut out = Vec::with_capacity(rows.len());
2✔
3680
    for row in rows {
4✔
3681
        let scope = GroupRowScope {
3682
            columns,
3683
            values: &row,
1✔
3684
        };
3685
        let keep = match eval_expr_scope(having, &scope)? {
2✔
3686
            Value::Bool(b) => b,
1✔
3687
            Value::Null => false,
×
3688
            Value::Integer(i) => i != 0,
×
3689
            other => {
×
3690
                return Err(SQLRiteError::Internal(format!(
×
3691
                    "HAVING clause must evaluate to boolean, got {}",
3692
                    other.to_display_string()
×
3693
                )));
3694
            }
3695
        };
3696
        if keep {
1✔
3697
            out.push(row);
1✔
3698
        }
3699
    }
3700
    Ok(out)
1✔
3701
}
3702

3703
/// SELECT DISTINCT post-pass. Walks the rows once with a `HashSet` of
3704
/// row-keys, preserving first-occurrence order. NULL == NULL for
3705
/// dedupe purposes, which matches the SQL DISTINCT semantic.
3706
fn dedupe_rows(rows: Vec<Vec<Value>>) -> Vec<Vec<Value>> {
1✔
3707
    use std::collections::HashSet;
3708
    let mut seen: HashSet<Vec<DistinctKey>> = HashSet::new();
1✔
3709
    let mut out = Vec::with_capacity(rows.len());
2✔
3710
    for row in rows {
4✔
3711
        let key: Vec<DistinctKey> = row.iter().map(DistinctKey::from_value).collect();
2✔
3712
        if seen.insert(key) {
1✔
3713
            out.push(row);
1✔
3714
        }
3715
    }
3716
    out
1✔
3717
}
3718

3719
/// Sort output rows for the aggregating path. ORDER BY can reference
3720
/// either an output column name (alias or bare GROUP BY column) or an
3721
/// aggregate function call by display form (e.g. `COUNT(*)`).
3722
fn sort_output_rows(
1✔
3723
    rows: &mut [Vec<Value>],
3724
    columns: &[String],
3725
    proj_items: &[ProjectionItem],
3726
    order: &OrderByClause,
3727
) -> Result<()> {
3728
    let target_idx = resolve_order_by_index(&order.expr, columns, proj_items)?;
1✔
3729
    rows.sort_by(|a, b| {
2✔
3730
        let va = &a[target_idx];
1✔
3731
        let vb = &b[target_idx];
1✔
3732
        let ord = compare_values(Some(va), Some(vb));
1✔
3733
        if order.ascending { ord } else { ord.reverse() }
1✔
3734
    });
3735
    Ok(())
1✔
3736
}
3737

3738
/// Map an ORDER BY expression to the index of the output column that
3739
/// should drive the sort.
3740
fn resolve_order_by_index(
1✔
3741
    expr: &Expr,
3742
    columns: &[String],
3743
    proj_items: &[ProjectionItem],
3744
) -> Result<usize> {
3745
    // Bare identifier — match against output names (alias-first).
3746
    let target_name: Option<String> = match expr {
1✔
3747
        Expr::Identifier(ident) => Some(ident.value.clone()),
1✔
3748
        Expr::CompoundIdentifier(parts) => parts.last().map(|p| p.value.clone()),
3✔
3749
        Expr::Function(_) => None,
1✔
3750
        Expr::Nested(inner) => return resolve_order_by_index(inner, columns, proj_items),
×
3751
        other => {
×
3752
            return Err(SQLRiteError::NotImplemented(format!(
×
3753
                "ORDER BY expression not supported on aggregating queries: {other:?}"
3754
            )));
3755
        }
3756
    };
3757
    if let Some(name) = target_name {
2✔
3758
        if let Some(i) = columns.iter().position(|c| c.eq_ignore_ascii_case(&name)) {
4✔
3759
            return Ok(i);
1✔
3760
        }
3761
        return Err(SQLRiteError::Internal(format!(
×
3762
            "ORDER BY references unknown column '{name}' in the SELECT output"
3763
        )));
3764
    }
3765
    // Function form: match by display name against any aggregate item
3766
    // whose canonical display equals the user's call. Tolerate case
3767
    // differences in the function name. SQLR-6 — a second pass with
3768
    // `t.` qualifiers stripped from both sides keeps qualifier
3769
    // spelling differences from blocking the match (`ORDER BY
3770
    // SUM(amount)` finds a `SELECT SUM(o.amount)` slot and vice
3771
    // versa), preserving the pre-qualifier behavior.
3772
    if let Expr::Function(func) = expr {
2✔
3773
        let user_disp = format_function_display(func, true);
1✔
3774
        for (i, item) in proj_items.iter().enumerate() {
2✔
3775
            if let ProjectionKind::Aggregate(call) = &item.kind
2✔
3776
                && call.display_name().eq_ignore_ascii_case(&user_disp)
1✔
3777
            {
3778
                return Ok(i);
1✔
3779
            }
3780
        }
3781
        let user_disp_unqualified = format_function_display(func, false);
1✔
3782
        for (i, item) in proj_items.iter().enumerate() {
2✔
3783
            if let ProjectionKind::Aggregate(call) = &item.kind
2✔
3784
                && call
2✔
3785
                    .display_name_unqualified()
1✔
3786
                    .eq_ignore_ascii_case(&user_disp_unqualified)
×
3787
            {
3788
                return Ok(i);
1✔
3789
            }
3790
        }
3791
        return Err(SQLRiteError::Internal(format!(
×
3792
            "ORDER BY references aggregate '{user_disp}' that isn't in the SELECT output"
3793
        )));
3794
    }
3795
    Err(SQLRiteError::Internal(
×
3796
        "ORDER BY expression could not be resolved against the output columns".to_string(),
×
3797
    ))
3798
}
3799

3800
/// Format a sqlparser function call into the same canonical form
3801
/// `AggregateCall::display_name()` uses, so ORDER BY on
3802
/// `COUNT(*)` / `SUM(salary)` matches its projection counterpart.
3803
/// `qualified` keeps or strips the argument's `t.` qualifier, matching
3804
/// `display_name()` / `display_name_unqualified()` respectively.
3805
fn format_function_display(func: &sqlparser::ast::Function, qualified: bool) -> String {
1✔
3806
    let name = match func.name.0.as_slice() {
2✔
3807
        [ObjectNamePart::Identifier(ident)] => ident.value.to_uppercase(),
2✔
3808
        _ => format!("{:?}", func.name).to_uppercase(),
×
3809
    };
3810
    let inner = match &func.args {
1✔
3811
        FunctionArguments::List(l) => {
1✔
3812
            let distinct = matches!(
1✔
3813
                l.duplicate_treatment,
1✔
3814
                Some(sqlparser::ast::DuplicateTreatment::Distinct)
3815
            );
3816
            let arg = l.args.first().map(|a| match a {
4✔
3817
                FunctionArg::Unnamed(FunctionArgExpr::Wildcard) => "*".to_string(),
1✔
3818
                FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(i))) => i.value.clone(),
1✔
3819
                FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::CompoundIdentifier(parts))) => {
1✔
3820
                    if qualified {
1✔
3821
                        parts
2✔
3822
                            .iter()
1✔
3823
                            .map(|p| p.value.clone())
3✔
3824
                            .collect::<Vec<_>>()
1✔
3825
                            .join(".")
×
3826
                    } else {
3827
                        parts.last().map(|p| p.value.clone()).unwrap_or_default()
×
3828
                    }
3829
                }
3830
                _ => String::new(),
×
3831
            });
3832
            match (distinct, arg) {
2✔
3833
                (true, Some(a)) if a != "*" => format!("DISTINCT {a}"),
×
3834
                (_, Some(a)) => a,
1✔
3835
                _ => String::new(),
×
3836
            }
3837
        }
3838
        _ => String::new(),
×
3839
    };
3840
    format!("{name}({inner})")
2✔
3841
}
3842

3843
fn convert_literal(v: &sqlparser::ast::Value) -> Result<Value> {
1✔
3844
    use sqlparser::ast::Value as AstValue;
3845
    match v {
1✔
3846
        AstValue::Number(n, _) => {
1✔
3847
            if let Ok(i) = n.parse::<i64>() {
2✔
3848
                Ok(Value::Integer(i))
1✔
3849
            } else if let Ok(f) = n.parse::<f64>() {
2✔
3850
                Ok(Value::Real(f))
1✔
3851
            } else {
3852
                Err(SQLRiteError::Internal(format!(
×
3853
                    "could not parse numeric literal '{n}'"
3854
                )))
3855
            }
3856
        }
3857
        AstValue::SingleQuotedString(s) => Ok(Value::Text(s.clone())),
1✔
3858
        AstValue::Boolean(b) => Ok(Value::Bool(*b)),
1✔
3859
        AstValue::Null => Ok(Value::Null),
1✔
3860
        other => Err(SQLRiteError::NotImplemented(format!(
×
3861
            "unsupported literal value: {other:?}"
3862
        ))),
3863
    }
3864
}
3865

3866
#[cfg(test)]
3867
mod tests {
3868
    use super::*;
3869

3870
    // -----------------------------------------------------------------
3871
    // Phase 7b — Vector distance function math
3872
    // -----------------------------------------------------------------
3873

3874
    /// Float comparison helper — distance results need a small epsilon
3875
    /// because we accumulate sums across many f32 multiplies.
3876
    fn approx_eq(a: f32, b: f32, eps: f32) -> bool {
1✔
3877
        (a - b).abs() < eps
1✔
3878
    }
3879

3880
    #[test]
3881
    fn vec_distance_l2_identical_is_zero() {
3✔
3882
        let v = vec![0.1, 0.2, 0.3];
1✔
3883
        assert_eq!(vec_distance_l2(&v, &v), 0.0);
2✔
3884
    }
3885

3886
    #[test]
3887
    fn vec_distance_l2_unit_basis_is_sqrt2() {
3✔
3888
        // [1, 0] vs [0, 1]: distance = √((1-0)² + (0-1)²) = √2 ≈ 1.414
3889
        let a = vec![1.0, 0.0];
1✔
3890
        let b = vec![0.0, 1.0];
2✔
3891
        assert!(approx_eq(vec_distance_l2(&a, &b), 2.0_f32.sqrt(), 1e-6));
2✔
3892
    }
3893

3894
    #[test]
3895
    fn vec_distance_l2_known_value() {
4✔
3896
        // [0, 0, 0] vs [3, 4, 0]: √(9 + 16 + 0) = 5 (the classic 3-4-5 triangle).
3897
        let a = vec![0.0, 0.0, 0.0];
1✔
3898
        let b = vec![3.0, 4.0, 0.0];
2✔
3899
        assert!(approx_eq(vec_distance_l2(&a, &b), 5.0, 1e-6));
2✔
3900
    }
3901

3902
    #[test]
3903
    fn vec_distance_cosine_identical_is_zero() {
3✔
3904
        let v = vec![0.1, 0.2, 0.3];
1✔
3905
        let d = vec_distance_cosine(&v, &v).unwrap();
2✔
3906
        assert!(approx_eq(d, 0.0, 1e-6), "cos(v,v) = {d}, expected ≈ 0");
1✔
3907
    }
3908

3909
    #[test]
3910
    fn vec_distance_cosine_orthogonal_is_one() {
3✔
3911
        // Two orthogonal unit vectors should have cosine distance = 1.0
3912
        // (cosine similarity = 0 → distance = 1 - 0 = 1).
3913
        let a = vec![1.0, 0.0];
1✔
3914
        let b = vec![0.0, 1.0];
2✔
3915
        assert!(approx_eq(vec_distance_cosine(&a, &b).unwrap(), 1.0, 1e-6));
2✔
3916
    }
3917

3918
    #[test]
3919
    fn vec_distance_cosine_opposite_is_two() {
3✔
3920
        // a and -a have cosine similarity = -1 → distance = 1 - (-1) = 2.
3921
        let a = vec![1.0, 0.0, 0.0];
1✔
3922
        let b = vec![-1.0, 0.0, 0.0];
2✔
3923
        assert!(approx_eq(vec_distance_cosine(&a, &b).unwrap(), 2.0, 1e-6));
2✔
3924
    }
3925

3926
    #[test]
3927
    fn vec_distance_cosine_zero_magnitude_errors() {
3✔
3928
        // Cosine is undefined for the zero vector — error rather than NaN.
3929
        let a = vec![0.0, 0.0];
1✔
3930
        let b = vec![1.0, 0.0];
2✔
3931
        let err = vec_distance_cosine(&a, &b).unwrap_err();
2✔
3932
        assert!(format!("{err}").contains("zero-magnitude"));
2✔
3933
    }
3934

3935
    #[test]
3936
    fn vec_distance_dot_negates() {
4✔
3937
        // a·b = 1*4 + 2*5 + 3*6 = 32. Negated → -32.
3938
        let a = vec![1.0, 2.0, 3.0];
1✔
3939
        let b = vec![4.0, 5.0, 6.0];
2✔
3940
        assert!(approx_eq(vec_distance_dot(&a, &b), -32.0, 1e-6));
2✔
3941
    }
3942

3943
    #[test]
3944
    fn vec_distance_dot_orthogonal_is_zero() {
3✔
3945
        // Orthogonal vectors have dot product 0 → negated is also 0.
3946
        let a = vec![1.0, 0.0];
1✔
3947
        let b = vec![0.0, 1.0];
2✔
3948
        assert_eq!(vec_distance_dot(&a, &b), 0.0);
2✔
3949
    }
3950

3951
    #[test]
3952
    fn vec_distance_dot_unit_norm_matches_cosine_minus_one() {
3✔
3953
        // For unit-norm vectors: dot(a,b) = cos(a,b)
3954
        // → -dot(a,b) = -cos(a,b) = (1 - cos(a,b)) - 1 = vec_distance_cosine(a,b) - 1.
3955
        // Useful sanity check that the two functions agree on unit vectors.
3956
        let a = vec![0.6f32, 0.8]; // unit norm: √(0.36+0.64) = 1
1✔
3957
        let b = vec![0.8f32, 0.6]; // unit norm too
2✔
3958
        let dot = vec_distance_dot(&a, &b);
2✔
3959
        let cos = vec_distance_cosine(&a, &b).unwrap();
1✔
3960
        assert!(approx_eq(dot, cos - 1.0, 1e-5));
1✔
3961
    }
3962

3963
    // -----------------------------------------------------------------
3964
    // Phase 7c — bounded-heap top-k correctness + benchmark
3965
    // -----------------------------------------------------------------
3966

3967
    use crate::sql::db::database::Database;
3968
    use crate::sql::dialect::SqlriteDialect;
3969
    use crate::sql::parser::select::SelectQuery;
3970
    use sqlparser::parser::Parser;
3971

3972
    /// Builds a `docs(id INTEGER PK, score REAL)` table with N rows of
3973
    /// distinct positive scores so top-k tests aren't sensitive to
3974
    /// tie-breaking (heap is unstable; full-sort is stable; we want
3975
    /// both to agree without arguing about equal-score row order).
3976
    ///
3977
    /// **Why positive scores:** the INSERT parser doesn't currently
3978
    /// handle `Expr::UnaryOp(Minus, …)` for negative number literals
3979
    /// (it would parse `-3.14` as a unary expression and the value
3980
    /// extractor would skip it). That's a pre-existing bug, out of
3981
    /// scope for 7c. Using the Knuth multiplicative hash gives us
3982
    /// distinct positive scrambled values without dancing around the
3983
    /// negative-literal limitation.
3984
    fn seed_score_table(n: usize) -> Database {
1✔
3985
        let mut db = Database::new("tempdb".to_string());
1✔
3986
        crate::sql::process_command(
3987
            "CREATE TABLE docs (id INTEGER PRIMARY KEY, score REAL);",
3988
            &mut db,
3989
        )
3990
        .expect("create");
3991
        for i in 0..n {
1✔
3992
            // Knuth multiplicative hash mod 1_000_000 — distinct,
3993
            // dense in [0, 999_999], no collisions for n up to ~tens
3994
            // of thousands.
3995
            let score = ((i as u64).wrapping_mul(2_654_435_761) % 1_000_000) as f64;
2✔
3996
            let sql = format!("INSERT INTO docs (score) VALUES ({score});");
1✔
3997
            crate::sql::process_command(&sql, &mut db).expect("insert");
2✔
3998
        }
3999
        db
1✔
4000
    }
4001

4002
    /// Helper: parses an SQL SELECT into a SelectQuery so we can drive
4003
    /// `select_topk` / `sort_rowids` directly without the rest of the
4004
    /// process_command pipeline.
4005
    fn parse_select(sql: &str) -> SelectQuery {
1✔
4006
        let dialect = SqlriteDialect::new();
1✔
4007
        let mut ast = Parser::parse_sql(&dialect, sql).expect("parse");
1✔
4008
        let stmt = ast.pop().expect("one statement");
2✔
4009
        SelectQuery::new(&stmt).expect("select-query")
2✔
4010
    }
4011

4012
    #[test]
4013
    fn topk_matches_full_sort_asc() {
3✔
4014
        // Build N=200, top-k=10. Bounded heap output must equal
4015
        // full-sort-then-truncate output (both produce ASC order).
4016
        let db = seed_score_table(200);
1✔
4017
        let table = db.get_table("docs".to_string()).unwrap();
2✔
4018
        let q = parse_select("SELECT * FROM docs ORDER BY score ASC LIMIT 10;");
1✔
4019
        let order = q.order_by.as_ref().unwrap();
2✔
4020
        let all_rowids = table.rowids();
1✔
4021

4022
        // Full-sort path
4023
        let mut full = all_rowids.clone();
1✔
4024
        sort_rowids(&mut full, table, order).unwrap();
2✔
4025
        full.truncate(10);
1✔
4026

4027
        // Bounded-heap path
4028
        let topk = select_topk(&all_rowids, table, order, 10).unwrap();
1✔
4029

4030
        assert_eq!(topk, full, "top-k via heap should match full-sort+truncate");
2✔
4031
    }
4032

4033
    #[test]
4034
    fn topk_matches_full_sort_desc() {
3✔
4035
        // Same with DESC — verifies the direction-aware Ord wrapper.
4036
        let db = seed_score_table(200);
1✔
4037
        let table = db.get_table("docs".to_string()).unwrap();
2✔
4038
        let q = parse_select("SELECT * FROM docs ORDER BY score DESC LIMIT 10;");
1✔
4039
        let order = q.order_by.as_ref().unwrap();
2✔
4040
        let all_rowids = table.rowids();
1✔
4041

4042
        let mut full = all_rowids.clone();
1✔
4043
        sort_rowids(&mut full, table, order).unwrap();
2✔
4044
        full.truncate(10);
1✔
4045

4046
        let topk = select_topk(&all_rowids, table, order, 10).unwrap();
1✔
4047

4048
        assert_eq!(
2✔
4049
            topk, full,
4050
            "top-k DESC via heap should match full-sort+truncate"
4051
        );
4052
    }
4053

4054
    #[test]
4055
    fn topk_k_larger_than_n_returns_everything_sorted() {
3✔
4056
        // The executor branches off to the full-sort path when k >= N,
4057
        // but if a caller invokes select_topk directly with k > N, it
4058
        // should still produce all-sorted output (no truncation
4059
        // because we don't have N items to truncate to k).
4060
        let db = seed_score_table(50);
1✔
4061
        let table = db.get_table("docs".to_string()).unwrap();
2✔
4062
        let q = parse_select("SELECT * FROM docs ORDER BY score ASC LIMIT 1000;");
1✔
4063
        let order = q.order_by.as_ref().unwrap();
2✔
4064
        let topk = select_topk(&table.rowids(), table, order, 1000).unwrap();
1✔
4065
        assert_eq!(topk.len(), 50);
1✔
4066
        // All scores in ascending order.
4067
        let scores: Vec<f64> = topk
1✔
4068
            .iter()
4069
            .filter_map(|r| match table.get_value("score", *r) {
3✔
4070
                Some(Value::Real(f)) => Some(f),
1✔
4071
                _ => None,
×
4072
            })
4073
            .collect();
4074
        assert!(scores.windows(2).all(|w| w[0] <= w[1]));
4✔
4075
    }
4076

4077
    #[test]
4078
    fn topk_k_zero_returns_empty() {
3✔
4079
        let db = seed_score_table(10);
1✔
4080
        let table = db.get_table("docs".to_string()).unwrap();
2✔
4081
        let q = parse_select("SELECT * FROM docs ORDER BY score ASC LIMIT 1;");
1✔
4082
        let order = q.order_by.as_ref().unwrap();
2✔
4083
        let topk = select_topk(&table.rowids(), table, order, 0).unwrap();
1✔
4084
        assert!(topk.is_empty());
1✔
4085
    }
4086

4087
    #[test]
4088
    fn topk_empty_input_returns_empty() {
3✔
4089
        let db = seed_score_table(0);
1✔
4090
        let table = db.get_table("docs".to_string()).unwrap();
2✔
4091
        let q = parse_select("SELECT * FROM docs ORDER BY score ASC LIMIT 5;");
1✔
4092
        let order = q.order_by.as_ref().unwrap();
2✔
4093
        let topk = select_topk(&[], table, order, 5).unwrap();
1✔
4094
        assert!(topk.is_empty());
2✔
4095
    }
4096

4097
    #[test]
4098
    fn topk_works_through_select_executor_with_distance_function() {
3✔
4099
        // Integration check that the executor actually picks the
4100
        // bounded-heap path on a KNN-shaped query and produces the
4101
        // correct top-k.
4102
        let mut db = Database::new("tempdb".to_string());
1✔
4103
        crate::sql::process_command(
4104
            "CREATE TABLE docs (id INTEGER PRIMARY KEY, e VECTOR(2));",
4105
            &mut db,
4106
        )
4107
        .unwrap();
4108
        // Five rows with distinct distances from probe [1.0, 0.0]:
4109
        //   id=1 [1.0, 0.0]   distance=0
4110
        //   id=2 [2.0, 0.0]   distance=1
4111
        //   id=3 [0.0, 3.0]   distance=√(1+9) = √10 ≈ 3.16
4112
        //   id=4 [1.0, 4.0]   distance=4
4113
        //   id=5 [10.0, 10.0] distance=√(81+100) ≈ 13.45
4114
        for v in &[
1✔
4115
            "[1.0, 0.0]",
4116
            "[2.0, 0.0]",
4117
            "[0.0, 3.0]",
4118
            "[1.0, 4.0]",
4119
            "[10.0, 10.0]",
4120
        ] {
4121
            crate::sql::process_command(&format!("INSERT INTO docs (e) VALUES ({v});"), &mut db)
3✔
4122
                .unwrap();
4123
        }
4124
        let resp = crate::sql::process_command(
4125
            "SELECT id FROM docs ORDER BY vec_distance_l2(e, [1.0, 0.0]) ASC LIMIT 3;",
4126
            &mut db,
4127
        )
4128
        .unwrap();
4129
        // Top-3 closest to [1.0, 0.0] are id=1, id=2, id=3 (in that order).
4130
        // The status message tells us how many rows came back.
4131
        assert!(resp.contains("3 rows returned"), "got: {resp}");
2✔
4132
    }
4133

4134
    /// Manual benchmark — not run by default. Recommended invocation:
4135
    ///
4136
    ///     cargo test -p sqlrite-engine --lib topk_benchmark --release \
4137
    ///         -- --ignored --nocapture
4138
    ///
4139
    /// (`--release` matters: Rust's optimized sort gets very fast under
4140
    /// optimization, so the heap's relative advantage is best observed
4141
    /// against a sort that's also been optimized.)
4142
    ///
4143
    /// Measured numbers on an Apple Silicon laptop with N=10_000 + k=10:
4144
    ///   - bounded heap:    ~820µs
4145
    ///   - full sort+trunc: ~1.5ms
4146
    ///   - ratio:           ~1.8×
4147
    ///
4148
    /// The advantage is real but moderate at this size because the sort
4149
    /// key here is a single REAL column read (cheap) and Rust's sort_by
4150
    /// has a very low constant factor. The asymptotic O(N log k) vs
4151
    /// O(N log N) advantage scales with N and with per-row work — KNN
4152
    /// queries where the sort key is `vec_distance_l2(col, [...])` are
4153
    /// where this path really pays off, because each key evaluation is
4154
    /// itself O(dim) and the heap path skips the per-row evaluation
4155
    /// in the comparator (see `sort_rowids` for the contrast).
4156
    #[test]
4157
    #[ignore]
4158
    fn topk_benchmark() {
4159
        use std::time::Instant;
4160
        const N: usize = 10_000;
4161
        const K: usize = 10;
4162

4163
        let db = seed_score_table(N);
4164
        let table = db.get_table("docs".to_string()).unwrap();
4165
        let q = parse_select("SELECT * FROM docs ORDER BY score ASC LIMIT 10;");
4166
        let order = q.order_by.as_ref().unwrap();
4167
        let all_rowids = table.rowids();
4168

4169
        // Time bounded heap.
4170
        let t0 = Instant::now();
4171
        let _topk = select_topk(&all_rowids, table, order, K).unwrap();
4172
        let heap_dur = t0.elapsed();
4173

4174
        // Time full sort + truncate.
4175
        let t1 = Instant::now();
4176
        let mut full = all_rowids.clone();
4177
        sort_rowids(&mut full, table, order).unwrap();
4178
        full.truncate(K);
4179
        let sort_dur = t1.elapsed();
4180

4181
        let ratio = sort_dur.as_secs_f64() / heap_dur.as_secs_f64().max(1e-9);
4182
        println!("\n--- topk_benchmark (N={N}, k={K}) ---");
4183
        println!("  bounded heap:   {heap_dur:?}");
4184
        println!("  full sort+trunc: {sort_dur:?}");
4185
        println!("  speedup ratio:  {ratio:.2}×");
4186

4187
        // Soft assertion. Floor is 1.4× because the cheap-key
4188
        // benchmark hovers around 1.8× empirically; setting this too
4189
        // close to the measured value risks flaky CI on slower
4190
        // runners. Floor of 1.4× still catches an actual regression
4191
        // (e.g., if select_topk became O(N²) or stopped using the
4192
        // heap entirely).
4193
        assert!(
4194
            ratio > 1.4,
4195
            "bounded heap should be substantially faster than full sort, but ratio = {ratio:.2}"
4196
        );
4197
    }
4198

4199
    // ---------------------------------------------------------------------
4200
    // SQLR-7 — IS NULL / IS NOT NULL
4201
    // ---------------------------------------------------------------------
4202

4203
    /// Helper for IS NULL tests: run a SELECT through process_command and
4204
    /// return the rendered table as a String so the test can assert on the
4205
    /// row-count line without re-implementing the executor.
4206
    fn run_select(db: &mut Database, sql: &str) -> String {
1✔
4207
        crate::sql::process_command(sql, db).expect("select")
1✔
4208
    }
4209

4210
    #[test]
4211
    fn where_is_null_returns_null_rows() {
3✔
4212
        let mut db = Database::new("t".to_string());
1✔
4213
        crate::sql::process_command(
4214
            "CREATE TABLE t (id INTEGER PRIMARY KEY, n INTEGER);",
4215
            &mut db,
4216
        )
4217
        .unwrap();
4218
        crate::sql::process_command("INSERT INTO t (id, n) VALUES (1, 10);", &mut db).unwrap();
1✔
4219
        crate::sql::process_command("INSERT INTO t (id, n) VALUES (2, NULL);", &mut db).unwrap();
1✔
4220
        crate::sql::process_command("INSERT INTO t (id, n) VALUES (3, 30);", &mut db).unwrap();
1✔
4221
        crate::sql::process_command("INSERT INTO t (id, n) VALUES (4, NULL);", &mut db).unwrap();
1✔
4222

4223
        let response = run_select(&mut db, "SELECT id FROM t WHERE n IS NULL;");
1✔
4224
        assert!(
×
4225
            response.contains("2 rows returned"),
2✔
4226
            "IS NULL should return 2 rows, got: {response}"
4227
        );
4228
    }
4229

4230
    #[test]
4231
    fn where_is_not_null_returns_non_null_rows() {
3✔
4232
        let mut db = Database::new("t".to_string());
1✔
4233
        crate::sql::process_command(
4234
            "CREATE TABLE t (id INTEGER PRIMARY KEY, n INTEGER);",
4235
            &mut db,
4236
        )
4237
        .unwrap();
4238
        crate::sql::process_command("INSERT INTO t (id, n) VALUES (1, 10);", &mut db).unwrap();
1✔
4239
        crate::sql::process_command("INSERT INTO t (id, n) VALUES (2, NULL);", &mut db).unwrap();
1✔
4240
        crate::sql::process_command("INSERT INTO t (id, n) VALUES (3, 30);", &mut db).unwrap();
1✔
4241

4242
        let response = run_select(&mut db, "SELECT id FROM t WHERE n IS NOT NULL;");
1✔
4243
        assert!(
×
4244
            response.contains("2 rows returned"),
2✔
4245
            "IS NOT NULL should return 2 rows, got: {response}"
4246
        );
4247
    }
4248

4249
    #[test]
4250
    fn where_is_null_on_indexed_column() {
3✔
4251
        // UNIQUE on a TEXT column gets an automatic secondary index.
4252
        // NULLs aren't stored in the index, so IS NULL falls through to
4253
        // a full scan via select_rowids — verify the full-scan path is
4254
        // still correct.
4255
        let mut db = Database::new("t".to_string());
1✔
4256
        crate::sql::process_command(
4257
            "CREATE TABLE t (id INTEGER PRIMARY KEY, name TEXT UNIQUE);",
4258
            &mut db,
4259
        )
4260
        .unwrap();
4261
        crate::sql::process_command("INSERT INTO t (id, name) VALUES (1, 'alice');", &mut db)
1✔
4262
            .unwrap();
4263
        crate::sql::process_command("INSERT INTO t (id, name) VALUES (2, NULL);", &mut db).unwrap();
1✔
4264
        crate::sql::process_command("INSERT INTO t (id, name) VALUES (3, 'bob');", &mut db)
1✔
4265
            .unwrap();
4266

4267
        let null_rows = run_select(&mut db, "SELECT id FROM t WHERE name IS NULL;");
1✔
4268
        assert!(
×
4269
            null_rows.contains("1 row returned"),
2✔
4270
            "indexed IS NULL should return 1 row, got: {null_rows}"
4271
        );
4272
        let not_null_rows = run_select(&mut db, "SELECT id FROM t WHERE name IS NOT NULL;");
1✔
4273
        assert!(
×
4274
            not_null_rows.contains("2 rows returned"),
2✔
4275
            "indexed IS NOT NULL should return 2 rows, got: {not_null_rows}"
4276
        );
4277
    }
4278

4279
    #[test]
4280
    fn where_is_null_works_on_omitted_column() {
3✔
4281
        // No DEFAULT, column missing from the INSERT column list — the
4282
        // BTreeMap entry never gets written, get_value returns None,
4283
        // eval_expr maps that to Value::Null, and IS NULL matches.
4284
        let mut db = Database::new("t".to_string());
1✔
4285
        crate::sql::process_command(
4286
            "CREATE TABLE t (id INTEGER PRIMARY KEY, qty INTEGER, label TEXT);",
4287
            &mut db,
4288
        )
4289
        .unwrap();
4290
        crate::sql::process_command(
4291
            "INSERT INTO t (id, qty, label) VALUES (1, 7, 'a');",
4292
            &mut db,
4293
        )
4294
        .unwrap();
4295
        // qty omitted on row 2.
4296
        crate::sql::process_command("INSERT INTO t (id, label) VALUES (2, 'b');", &mut db).unwrap();
1✔
4297

4298
        let response = run_select(&mut db, "SELECT id FROM t WHERE qty IS NULL;");
1✔
4299
        assert!(
×
4300
            response.contains("1 row returned"),
2✔
4301
            "IS NULL should match the omitted-column row, got: {response}"
4302
        );
4303
    }
4304

4305
    // ---------------------------------------------------------------------
4306
    // SQLR-2 — unknown columns error in single-table scope, matching
4307
    // JoinedScope. Before the fix, lookup silently returned NULL, so a
4308
    // typo'd WHERE matched every row (catastrophic for UPDATE/DELETE).
4309
    // ---------------------------------------------------------------------
4310

4311
    /// Seed a two-row table the SQLR-2 tests share.
4312
    fn seed_sqlr2() -> Database {
1✔
4313
        let mut db = Database::new("t".to_string());
1✔
4314
        crate::sql::process_command(
4315
            "CREATE TABLE t (id INTEGER PRIMARY KEY, name TEXT);",
4316
            &mut db,
4317
        )
4318
        .unwrap();
4319
        crate::sql::process_command("INSERT INTO t (id, name) VALUES (1, 'alice');", &mut db)
1✔
4320
            .unwrap();
4321
        crate::sql::process_command("INSERT INTO t (id, name) VALUES (2, 'bob');", &mut db)
1✔
4322
            .unwrap();
4323
        db
1✔
4324
    }
4325

4326
    #[test]
4327
    fn where_unknown_column_errors_single_table() {
3✔
4328
        let mut db = seed_sqlr2();
1✔
4329
        let res = crate::sql::process_command("SELECT id FROM t WHERE typo IS NULL;", &mut db);
1✔
4330
        let err = res.expect_err("WHERE on an unknown column must error, not match via NULL");
1✔
NEW
4331
        assert!(
×
4332
            err.to_string().contains("does not exist"),
3✔
4333
            "expected unknown-column error, got: {err}"
4334
        );
4335
    }
4336

4337
    #[test]
4338
    fn order_by_unknown_column_errors_single_table() {
3✔
4339
        let mut db = seed_sqlr2();
1✔
4340
        let res = crate::sql::process_command("SELECT id FROM t ORDER BY typo;", &mut db);
1✔
NEW
4341
        assert!(
×
4342
            res.is_err(),
2✔
4343
            "ORDER BY on an unknown column must error, not sort by NULL"
4344
        );
4345
    }
4346

4347
    #[test]
4348
    fn update_with_unknown_column_in_where_errors_and_mutates_nothing() {
3✔
4349
        let mut db = seed_sqlr2();
1✔
4350
        let res =
1✔
4351
            crate::sql::process_command("UPDATE t SET name = 'x' WHERE typo IS NULL;", &mut db);
NEW
4352
        assert!(
×
4353
            res.is_err(),
2✔
4354
            "UPDATE with a typo'd WHERE column must error, not update every row"
4355
        );
4356
        let rows = run_select(&mut db, "SELECT id FROM t WHERE name = 'x';");
1✔
NEW
4357
        assert!(
×
4358
            rows.contains("0 rows returned"),
2✔
4359
            "no row may be updated when the WHERE errors, got: {rows}"
4360
        );
4361
    }
4362

4363
    #[test]
4364
    fn delete_with_unknown_column_in_where_errors_and_deletes_nothing() {
3✔
4365
        let mut db = seed_sqlr2();
1✔
4366
        let res = crate::sql::process_command("DELETE FROM t WHERE typo IS NULL;", &mut db);
1✔
NEW
4367
        assert!(
×
4368
            res.is_err(),
2✔
4369
            "DELETE with a typo'd WHERE column must error, not delete every row"
4370
        );
4371
        let rows = run_select(&mut db, "SELECT id FROM t;");
1✔
NEW
4372
        assert!(
×
4373
            rows.contains("2 rows returned"),
2✔
4374
            "no row may be deleted when the WHERE errors, got: {rows}"
4375
        );
4376
    }
4377

4378
    #[test]
4379
    fn where_is_null_combines_with_and_or() {
3✔
4380
        // Sanity check that the new arms compose with the existing
4381
        // boolean operators in eval_expr — `n IS NULL AND id > 1`
4382
        // should narrow correctly.
4383
        let mut db = Database::new("t".to_string());
1✔
4384
        crate::sql::process_command(
4385
            "CREATE TABLE t (id INTEGER PRIMARY KEY, n INTEGER);",
4386
            &mut db,
4387
        )
4388
        .unwrap();
4389
        crate::sql::process_command("INSERT INTO t (id, n) VALUES (1, NULL);", &mut db).unwrap();
1✔
4390
        crate::sql::process_command("INSERT INTO t (id, n) VALUES (2, NULL);", &mut db).unwrap();
1✔
4391
        crate::sql::process_command("INSERT INTO t (id, n) VALUES (3, 30);", &mut db).unwrap();
1✔
4392

4393
        let response = run_select(&mut db, "SELECT id FROM t WHERE n IS NULL AND id > 1;");
1✔
4394
        assert!(
×
4395
            response.contains("1 row returned"),
2✔
4396
            "IS NULL combined with AND should match exactly row 2, got: {response}"
4397
        );
4398
    }
4399

4400
    // ---------------------------------------------------------------------
4401
    // SQLR-3 — LIKE / IN / DISTINCT / GROUP BY / aggregates
4402
    // ---------------------------------------------------------------------
4403

4404
    /// Seed a small employees table the analytical tests share.
4405
    fn seed_employees() -> Database {
1✔
4406
        let mut db = Database::new("t".to_string());
1✔
4407
        crate::sql::process_command(
4408
            "CREATE TABLE emp (id INTEGER PRIMARY KEY, name TEXT, dept TEXT, salary INTEGER);",
4409
            &mut db,
4410
        )
4411
        .unwrap();
4412
        let rows = [
1✔
4413
            "INSERT INTO emp (name, dept, salary) VALUES ('Alice', 'eng', 100);",
4414
            "INSERT INTO emp (name, dept, salary) VALUES ('alex',  'eng', 120);",
4415
            "INSERT INTO emp (name, dept, salary) VALUES ('Bob',   'eng', 100);",
4416
            "INSERT INTO emp (name, dept, salary) VALUES ('Carol', 'sales', 90);",
4417
            "INSERT INTO emp (name, dept, salary) VALUES ('Dave',  'sales', NULL);",
4418
            "INSERT INTO emp (name, dept, salary) VALUES ('Eve',   'ops', 80);",
4419
        ];
4420
        for sql in rows {
2✔
4421
            crate::sql::process_command(sql, &mut db).unwrap();
2✔
4422
        }
4423
        db
1✔
4424
    }
4425

4426
    /// Drive `execute_select_rows` directly so tests can assert on typed values.
4427
    fn run_rows(db: &Database, sql: &str) -> SelectResult {
1✔
4428
        let q = parse_select(sql);
1✔
4429
        execute_select_rows(q, db).expect("select")
1✔
4430
    }
4431

4432
    // ----- LIKE -----
4433

4434
    #[test]
4435
    fn like_percent_prefix_case_insensitive() {
3✔
4436
        let db = seed_employees();
1✔
4437
        let r = run_rows(&db, "SELECT name FROM emp WHERE name LIKE 'a%';");
1✔
4438
        // Matches Alice and alex (case-insensitive ASCII).
4439
        let names: Vec<_> = r.rows.iter().map(|r| r[0].to_display_string()).collect();
4✔
4440
        assert_eq!(names.len(), 2, "expected 2 rows, got {names:?}");
2✔
4441
        assert!(names.contains(&"Alice".to_string()));
2✔
4442
        assert!(names.contains(&"alex".to_string()));
1✔
4443
    }
4444

4445
    #[test]
4446
    fn like_underscore_singlechar() {
3✔
4447
        let db = seed_employees();
1✔
4448
        let r = run_rows(&db, "SELECT name FROM emp WHERE name LIKE '_ve';");
1✔
4449
        // Eve matches; alex does not (3 chars vs 4).
4450
        let names: Vec<_> = r.rows.iter().map(|r| r[0].to_display_string()).collect();
4✔
4451
        assert_eq!(names, vec!["Eve".to_string()]);
2✔
4452
    }
4453

4454
    #[test]
4455
    fn not_like_excludes_match() {
3✔
4456
        let db = seed_employees();
1✔
4457
        let r = run_rows(&db, "SELECT name FROM emp WHERE name NOT LIKE 'a%';");
1✔
4458
        // Excludes Alice + alex; 4 rows remain.
4459
        assert_eq!(r.rows.len(), 4);
2✔
4460
    }
4461

4462
    #[test]
4463
    fn like_with_null_excludes_row() {
3✔
4464
        let db = seed_employees();
1✔
4465
        // Match 'sales' rows where salary is NULL → just Dave.
4466
        let r = run_rows(
4467
            &db,
4468
            "SELECT name FROM emp WHERE dept LIKE 'sales' AND salary IS NULL;",
4469
        );
4470
        assert_eq!(r.rows.len(), 1);
2✔
4471
        assert_eq!(r.rows[0][0].to_display_string(), "Dave");
1✔
4472
    }
4473

4474
    // ----- IN -----
4475

4476
    #[test]
4477
    fn in_list_positive() {
3✔
4478
        let db = seed_employees();
1✔
4479
        let r = run_rows(&db, "SELECT name FROM emp WHERE id IN (1, 3, 5);");
1✔
4480
        let names: Vec<_> = r.rows.iter().map(|r| r[0].to_display_string()).collect();
4✔
4481
        assert_eq!(names.len(), 3);
2✔
4482
        assert!(names.contains(&"Alice".to_string()));
1✔
4483
        assert!(names.contains(&"Bob".to_string()));
1✔
4484
        assert!(names.contains(&"Dave".to_string()));
1✔
4485
    }
4486

4487
    #[test]
4488
    fn not_in_excludes_listed() {
3✔
4489
        let db = seed_employees();
1✔
4490
        let r = run_rows(&db, "SELECT name FROM emp WHERE id NOT IN (1, 2);");
1✔
4491
        // 6 rows total - 2 excluded = 4.
4492
        assert_eq!(r.rows.len(), 4);
2✔
4493
    }
4494

4495
    #[test]
4496
    fn in_list_with_null_three_valued() {
3✔
4497
        let db = seed_employees();
1✔
4498
        // x = 1 should match; for other rows the NULL in the list yields
4499
        // unknown → false in WHERE → excluded.
4500
        let r = run_rows(&db, "SELECT name FROM emp WHERE id IN (1, NULL);");
1✔
4501
        assert_eq!(r.rows.len(), 1);
2✔
4502
        assert_eq!(r.rows[0][0].to_display_string(), "Alice");
1✔
4503
    }
4504

4505
    // ----- DISTINCT -----
4506

4507
    #[test]
4508
    fn distinct_single_column() {
3✔
4509
        let db = seed_employees();
1✔
4510
        let r = run_rows(&db, "SELECT DISTINCT dept FROM emp;");
1✔
4511
        // 3 distinct depts: eng, sales, ops.
4512
        assert_eq!(r.rows.len(), 3);
2✔
4513
    }
4514

4515
    #[test]
4516
    fn distinct_multi_column_with_null() {
3✔
4517
        let db = seed_employees();
1✔
4518
        // (dept, salary) tuples — the two 'eng' / 100 rows collapse.
4519
        let r = run_rows(&db, "SELECT DISTINCT dept, salary FROM emp;");
1✔
4520
        // 6 input rows; (eng, 100) appears twice → 5 distinct tuples.
4521
        assert_eq!(r.rows.len(), 5);
2✔
4522
    }
4523

4524
    // ----- Aggregates without GROUP BY -----
4525

4526
    #[test]
4527
    fn count_star_no_groupby() {
3✔
4528
        let db = seed_employees();
1✔
4529
        let r = run_rows(&db, "SELECT COUNT(*) FROM emp;");
1✔
4530
        assert_eq!(r.rows.len(), 1);
2✔
4531
        assert_eq!(r.rows[0][0], Value::Integer(6));
1✔
4532
    }
4533

4534
    #[test]
4535
    fn count_col_skips_nulls() {
3✔
4536
        let db = seed_employees();
1✔
4537
        let r = run_rows(&db, "SELECT COUNT(salary) FROM emp;");
1✔
4538
        // 6 rows, 1 NULL salary → COUNT(salary) = 5.
4539
        assert_eq!(r.rows[0][0], Value::Integer(5));
2✔
4540
    }
4541

4542
    #[test]
4543
    fn count_distinct_dedupes_and_skips_nulls() {
3✔
4544
        let db = seed_employees();
1✔
4545
        let r = run_rows(&db, "SELECT COUNT(DISTINCT salary) FROM emp;");
1✔
4546
        // Distinct non-null salaries: {100, 120, 90, 80} → 4.
4547
        assert_eq!(r.rows[0][0], Value::Integer(4));
2✔
4548
    }
4549

4550
    #[test]
4551
    fn sum_int_stays_integer() {
3✔
4552
        let db = seed_employees();
1✔
4553
        let r = run_rows(&db, "SELECT SUM(salary) FROM emp;");
1✔
4554
        // 100 + 120 + 100 + 90 + 80 = 490 (NULL skipped).
4555
        assert_eq!(r.rows[0][0], Value::Integer(490));
2✔
4556
    }
4557

4558
    #[test]
4559
    fn avg_returns_real() {
3✔
4560
        let db = seed_employees();
1✔
4561
        let r = run_rows(&db, "SELECT AVG(salary) FROM emp;");
1✔
4562
        // 490 / 5 = 98.0
4563
        match &r.rows[0][0] {
2✔
4564
            Value::Real(v) => assert!((v - 98.0).abs() < 1e-9),
2✔
4565
            other => panic!("expected Real, got {other:?}"),
×
4566
        }
4567
    }
4568

4569
    #[test]
4570
    fn min_max_skip_nulls() {
3✔
4571
        let db = seed_employees();
1✔
4572
        let r = run_rows(&db, "SELECT MIN(salary), MAX(salary) FROM emp;");
1✔
4573
        assert_eq!(r.rows[0][0], Value::Integer(80));
2✔
4574
        assert_eq!(r.rows[0][1], Value::Integer(120));
1✔
4575
    }
4576

4577
    #[test]
4578
    fn aggregates_on_empty_table_emit_one_row() {
3✔
4579
        let mut db = Database::new("t".to_string());
1✔
4580
        crate::sql::process_command("CREATE TABLE t (x INTEGER);", &mut db).unwrap();
2✔
4581
        let r = run_rows(
4582
            &db,
4583
            "SELECT COUNT(*), SUM(x), AVG(x), MIN(x), MAX(x) FROM t;",
4584
        );
4585
        assert_eq!(r.rows.len(), 1);
2✔
4586
        assert_eq!(r.rows[0][0], Value::Integer(0));
1✔
4587
        assert_eq!(r.rows[0][1], Value::Null);
1✔
4588
        assert_eq!(r.rows[0][2], Value::Null);
1✔
4589
        assert_eq!(r.rows[0][3], Value::Null);
1✔
4590
        assert_eq!(r.rows[0][4], Value::Null);
1✔
4591
    }
4592

4593
    // ----- GROUP BY -----
4594

4595
    #[test]
4596
    fn group_by_single_col_with_count() {
3✔
4597
        let db = seed_employees();
1✔
4598
        let r = run_rows(&db, "SELECT dept, COUNT(*) FROM emp GROUP BY dept;");
1✔
4599
        assert_eq!(r.rows.len(), 3);
2✔
4600
        // Build a map for a stable assertion regardless of group order.
4601
        let mut by_dept: std::collections::HashMap<String, i64> = Default::default();
1✔
4602
        for row in &r.rows {
3✔
4603
            let d = row[0].to_display_string();
2✔
4604
            let c = match &row[1] {
2✔
4605
                Value::Integer(i) => *i,
1✔
4606
                v => panic!("expected Integer count, got {v:?}"),
×
4607
            };
4608
            by_dept.insert(d, c);
1✔
4609
        }
4610
        assert_eq!(by_dept["eng"], 3);
1✔
4611
        assert_eq!(by_dept["sales"], 2);
1✔
4612
        assert_eq!(by_dept["ops"], 1);
1✔
4613
    }
4614

4615
    #[test]
4616
    fn group_by_with_where_filter() {
3✔
4617
        let db = seed_employees();
1✔
4618
        let r = run_rows(
4619
            &db,
4620
            "SELECT dept, SUM(salary) FROM emp WHERE salary > 80 GROUP BY dept;",
4621
        );
4622
        // After WHERE, ops drops out (Eve = 80 excluded). eng has 3 rows
4623
        // contributing (100+120+100=320); sales has 1 (90; Dave NULL skipped).
4624
        let by: std::collections::HashMap<String, i64> = r
1✔
4625
            .rows
4626
            .iter()
4627
            .map(|row| {
2✔
4628
                (
4629
                    row[0].to_display_string(),
1✔
4630
                    match &row[1] {
2✔
4631
                        Value::Integer(i) => *i,
1✔
4632
                        v => panic!("expected Integer sum, got {v:?}"),
×
4633
                    },
4634
                )
4635
            })
4636
            .collect();
4637
        assert_eq!(by.len(), 2);
2✔
4638
        assert_eq!(by["eng"], 320);
1✔
4639
        assert_eq!(by["sales"], 90);
1✔
4640
    }
4641

4642
    #[test]
4643
    fn group_by_without_aggregates_is_distinct() {
3✔
4644
        let db = seed_employees();
1✔
4645
        let r = run_rows(&db, "SELECT dept FROM emp GROUP BY dept;");
1✔
4646
        assert_eq!(r.rows.len(), 3);
2✔
4647
    }
4648

4649
    #[test]
4650
    fn order_by_count_desc() {
3✔
4651
        let db = seed_employees();
1✔
4652
        let r = run_rows(
4653
            &db,
4654
            "SELECT dept, COUNT(*) AS n FROM emp GROUP BY dept ORDER BY n DESC LIMIT 2;",
4655
        );
4656
        assert_eq!(r.rows.len(), 2);
2✔
4657
        // Top group is 'eng' with 3.
4658
        assert_eq!(r.rows[0][0].to_display_string(), "eng");
1✔
4659
        assert_eq!(r.rows[0][1], Value::Integer(3));
1✔
4660
    }
4661

4662
    #[test]
4663
    fn order_by_aggregate_call_form() {
3✔
4664
        let db = seed_employees();
1✔
4665
        // No alias — ORDER BY references the aggregate by its display form.
4666
        let r = run_rows(
4667
            &db,
4668
            "SELECT dept, COUNT(*) FROM emp GROUP BY dept ORDER BY COUNT(*) DESC;",
4669
        );
4670
        assert_eq!(r.rows.len(), 3);
2✔
4671
        assert_eq!(r.rows[0][0].to_display_string(), "eng");
1✔
4672
    }
4673

4674
    #[test]
4675
    fn group_by_invalid_bare_column_errors() {
3✔
4676
        // `name` is neither aggregated nor in GROUP BY → must error at parse.
4677
        let mut db = Database::new("t".to_string());
1✔
4678
        crate::sql::process_command(
4679
            "CREATE TABLE t (id INTEGER PRIMARY KEY, dept TEXT, name TEXT);",
4680
            &mut db,
4681
        )
4682
        .unwrap();
4683
        let err = crate::sql::process_command("SELECT dept, name FROM t GROUP BY dept;", &mut db);
1✔
4684
        assert!(err.is_err(), "should reject bare 'name' not in GROUP BY");
2✔
4685
    }
4686

4687
    #[test]
4688
    fn aggregate_in_where_errors_friendly() {
3✔
4689
        let mut db = Database::new("t".to_string());
1✔
4690
        crate::sql::process_command("CREATE TABLE t (x INTEGER);", &mut db).unwrap();
2✔
4691
        crate::sql::process_command("INSERT INTO t (x) VALUES (1);", &mut db).unwrap();
1✔
4692
        let err = crate::sql::process_command("SELECT x FROM t WHERE COUNT(*) > 0;", &mut db);
1✔
4693
        assert!(err.is_err(), "aggregates must not be allowed in WHERE");
2✔
4694
    }
4695

4696
    // ---------------------------------------------------------------------
4697
    // SQLR-52 — HAVING (post-aggregation filter)
4698
    // ---------------------------------------------------------------------
4699
    //
4700
    // seed_employees groups: eng × 3 (salaries 100, 120, 100 → SUM 320),
4701
    // sales × 2 (90, NULL → SUM 90), ops × 1 (80). Groups materialize in
4702
    // first-occurrence order: eng, sales, ops.
4703

4704
    #[test]
4705
    fn having_count_filters_groups() {
3✔
4706
        let db = seed_employees();
1✔
4707
        let r = run_rows(
4708
            &db,
4709
            "SELECT dept, COUNT(*) FROM emp GROUP BY dept HAVING COUNT(*) > 1;",
4710
        );
4711
        // ops (1 member) drops; hidden HAVING slots must not leak into
4712
        // the output width.
4713
        assert_eq!(r.columns, vec!["dept".to_string(), "COUNT(*)".to_string()]);
2✔
4714
        let got: Vec<(String, i64)> = r
1✔
4715
            .rows
4716
            .iter()
4717
            .map(|row| (row[0].to_display_string(), expect_int(&row[1])))
3✔
4718
            .collect();
4719
        assert_eq!(got, vec![("eng".to_string(), 3), ("sales".to_string(), 2)]);
2✔
4720
    }
4721

4722
    #[test]
4723
    fn having_sum_threshold() {
3✔
4724
        let db = seed_employees();
1✔
4725
        let r = run_rows(
4726
            &db,
4727
            "SELECT dept, SUM(salary) FROM emp GROUP BY dept HAVING SUM(salary) > 100;",
4728
        );
4729
        assert_eq!(r.rows.len(), 1);
2✔
4730
        assert_eq!(r.rows[0][0].to_display_string(), "eng");
1✔
4731
        assert_eq!(r.rows[0][1], Value::Integer(320));
1✔
4732
    }
4733

4734
    #[test]
4735
    fn having_references_aggregate_alias() {
3✔
4736
        let db = seed_employees();
1✔
4737
        let r = run_rows(
4738
            &db,
4739
            "SELECT dept, SUM(salary) AS total FROM emp GROUP BY dept HAVING total > 100;",
4740
        );
4741
        assert_eq!(r.columns, vec!["dept".to_string(), "total".to_string()]);
2✔
4742
        assert_eq!(r.rows.len(), 1);
1✔
4743
        assert_eq!(r.rows[0][1], Value::Integer(320));
1✔
4744
    }
4745

4746
    #[test]
4747
    fn having_aggregate_not_in_projection() {
3✔
4748
        let db = seed_employees();
1✔
4749
        // COUNT(*) only exists in HAVING — computed via a hidden slot,
4750
        // stripped before output.
4751
        let r = run_rows(
4752
            &db,
4753
            "SELECT dept FROM emp GROUP BY dept HAVING COUNT(*) > 1;",
4754
        );
4755
        assert_eq!(r.columns, vec!["dept".to_string()]);
2✔
4756
        let depts: Vec<String> = r
1✔
4757
            .rows
4758
            .iter()
4759
            .map(|row| row[0].to_display_string())
3✔
4760
            .collect();
4761
        assert_eq!(depts, vec!["eng".to_string(), "sales".to_string()]);
2✔
4762
    }
4763

4764
    #[test]
4765
    fn having_group_key_not_in_projection() {
3✔
4766
        let db = seed_employees();
1✔
4767
        // dept only exists in GROUP BY + HAVING, not the SELECT list.
4768
        let r = run_rows(
4769
            &db,
4770
            "SELECT COUNT(*) FROM emp GROUP BY dept HAVING dept = 'eng';",
4771
        );
4772
        assert_eq!(r.columns, vec!["COUNT(*)".to_string()]);
2✔
4773
        assert_eq!(r.rows.len(), 1);
1✔
4774
        assert_eq!(r.rows[0][0], Value::Integer(3));
1✔
4775
    }
4776

4777
    #[test]
4778
    fn having_compound_and_predicate() {
3✔
4779
        let db = seed_employees();
1✔
4780
        let r = run_rows(
4781
            &db,
4782
            "SELECT dept FROM emp GROUP BY dept \
4783
             HAVING COUNT(*) > 1 AND SUM(salary) > 100;",
4784
        );
4785
        // eng passes both; sales passes COUNT but fails SUM (90).
4786
        assert_eq!(r.rows.len(), 1);
2✔
4787
        assert_eq!(r.rows[0][0].to_display_string(), "eng");
1✔
4788
    }
4789

4790
    #[test]
4791
    fn having_composes_with_order_by_and_limit() {
4✔
4792
        let db = seed_employees();
1✔
4793
        let r = run_rows(
4794
            &db,
4795
            "SELECT dept, COUNT(*) AS n FROM emp GROUP BY dept \
4796
             HAVING n >= 1 ORDER BY n DESC LIMIT 2;",
4797
        );
4798
        let got: Vec<(String, i64)> = r
1✔
4799
            .rows
4800
            .iter()
4801
            .map(|row| (row[0].to_display_string(), expect_int(&row[1])))
3✔
4802
            .collect();
4803
        assert_eq!(got, vec![("eng".to_string(), 3), ("sales".to_string(), 2)]);
2✔
4804
    }
4805

4806
    #[test]
4807
    fn having_can_exclude_every_group() {
3✔
4808
        let db = seed_employees();
1✔
4809
        let r = run_rows(
4810
            &db,
4811
            "SELECT dept FROM emp GROUP BY dept HAVING COUNT(*) > 99;",
4812
        );
4813
        assert_eq!(r.rows.len(), 0);
2✔
4814
    }
4815

4816
    #[test]
4817
    fn having_null_aggregate_collapses_to_false() {
3✔
4818
        let mut db = seed_employees();
1✔
4819
        // mkt's only salary is NULL → SUM(salary) is NULL → NULL > 0 is
4820
        // unknown → group excluded (NULL-as-false, same as WHERE).
4821
        crate::sql::process_command(
4822
            "INSERT INTO emp (name, dept, salary) VALUES ('Zoe', 'mkt', NULL);",
4823
            &mut db,
4824
        )
4825
        .unwrap();
4826
        let r = run_rows(
4827
            &db,
4828
            "SELECT dept FROM emp GROUP BY dept HAVING SUM(salary) > 0;",
4829
        );
4830
        let depts: Vec<String> = r
1✔
4831
            .rows
4832
            .iter()
4833
            .map(|row| row[0].to_display_string())
3✔
4834
            .collect();
4835
        assert_eq!(
1✔
4836
            depts,
4837
            vec!["eng".to_string(), "sales".to_string(), "ops".to_string()],
2✔
4838
            "mkt (all-NULL salaries) must be filtered out"
4839
        );
4840
    }
4841

4842
    #[test]
4843
    fn having_lowercase_function_form_matches() {
3✔
4844
        let db = seed_employees();
1✔
4845
        let r = run_rows(
4846
            &db,
4847
            "SELECT dept FROM emp GROUP BY dept HAVING count(*) > 1;",
4848
        );
4849
        assert_eq!(r.rows.len(), 2);
2✔
4850
    }
4851

4852
    #[test]
4853
    fn having_without_group_by_is_rejected() {
3✔
4854
        let mut db = seed_employees();
1✔
4855
        let err =
2✔
4856
            crate::sql::process_command("SELECT COUNT(*) FROM emp HAVING COUNT(*) > 0;", &mut db);
4857
        match err {
1✔
4858
            Err(SQLRiteError::NotImplemented(msg)) => assert!(
1✔
4859
                msg.contains("HAVING without GROUP BY"),
2✔
4860
                "unexpected message: {msg}"
4861
            ),
4862
            other => panic!("expected NotImplemented, got {other:?}"),
×
4863
        }
4864
    }
4865

4866
    #[test]
4867
    fn having_unknown_column_is_rejected() {
3✔
4868
        let mut db = seed_employees();
1✔
4869
        // `name` is neither a GROUP BY key nor an aggregate — typed error,
4870
        // not a silent NULL like the legacy single-table WHERE leniency.
4871
        let err = crate::sql::process_command(
4872
            "SELECT dept, COUNT(*) FROM emp GROUP BY dept HAVING name = 'Alice';",
4873
            &mut db,
4874
        );
4875
        match err {
1✔
4876
            Err(e) => {
1✔
4877
                let msg = e.to_string();
1✔
4878
                assert!(
×
4879
                    msg.contains("HAVING references"),
2✔
4880
                    "unexpected message: {msg}"
4881
                );
4882
            }
4883
            Ok(_) => panic!("HAVING on an out-of-scope column must error"),
×
4884
        }
4885
    }
4886

4887
    #[test]
4888
    fn having_over_join_filters_groups_for_all_flavors() {
3✔
4889
        // SQLR-6 — GROUP BY + HAVING compose with every join flavor.
4890
        // Only Alice has more than one order; the dangling order (RIGHT /
4891
        // FULL) groups under a NULL name with count 1 and is filtered.
4892
        for flavor in ["INNER", "LEFT OUTER", "RIGHT OUTER", "FULL OUTER"] {
2✔
4893
            let sql = format!(
2✔
4894
                "SELECT customers.name, COUNT(*) FROM customers \
4895
                 {flavor} JOIN orders ON customers.id = orders.customer_id \
4896
                 GROUP BY customers.name HAVING COUNT(*) > 1;"
4897
            );
4898
            let db = seed_join_fixture();
1✔
4899
            let r = run_rows(&db, &sql);
2✔
4900
            assert_eq!(r.rows.len(), 1, "{flavor}: only Alice has >1 order");
2✔
4901
            assert_eq!(r.rows[0][0].to_display_string(), "Alice", "{flavor}");
2✔
4902
            assert_eq!(expect_int(&r.rows[0][1]), 2, "{flavor}");
1✔
4903
        }
4904
    }
4905

4906
    /// Helper: unwrap an integer `Value` in HAVING tests.
4907
    fn expect_int(v: &Value) -> i64 {
1✔
4908
        match v {
1✔
4909
            Value::Integer(i) => *i,
1✔
4910
            other => panic!("expected integer value, got {other:?}"),
×
4911
        }
4912
    }
4913

4914
    // ---------------------------------------------------------------------
4915
    // SQLR-5 — JOINs (INNER / LEFT OUTER / RIGHT OUTER / FULL OUTER)
4916
    // ---------------------------------------------------------------------
4917

4918
    /// Two-table fixture used across the join tests. `customers` has
4919
    /// (1: Alice, 2: Bob, 3: Carol). `orders` has (id, customer_id,
4920
    /// amount): (1, 1, 100), (2, 1, 200), (3, 2, 50), (4, 4, 999).
4921
    /// Customer 3 (Carol) has no orders; order 4 has no customer
4922
    /// (dangling foreign key) — together they exercise both sides of
4923
    /// the outer-join NULL-padding.
4924
    fn seed_join_fixture() -> Database {
1✔
4925
        let mut db = Database::new("t".to_string());
1✔
4926
        for sql in [
3✔
4927
            "CREATE TABLE customers (id INTEGER PRIMARY KEY, name TEXT);",
4928
            "CREATE TABLE orders (id INTEGER PRIMARY KEY, customer_id INTEGER, amount INTEGER);",
4929
            "INSERT INTO customers (name) VALUES ('Alice');",
4930
            "INSERT INTO customers (name) VALUES ('Bob');",
4931
            "INSERT INTO customers (name) VALUES ('Carol');",
4932
            "INSERT INTO orders (customer_id, amount) VALUES (1, 100);",
4933
            "INSERT INTO orders (customer_id, amount) VALUES (1, 200);",
4934
            "INSERT INTO orders (customer_id, amount) VALUES (2, 50);",
4935
            "INSERT INTO orders (customer_id, amount) VALUES (4, 999);",
4936
        ] {
4937
            crate::sql::process_command(sql, &mut db).unwrap();
2✔
4938
        }
4939
        db
1✔
4940
    }
4941

4942
    #[test]
4943
    fn inner_join_returns_only_matched_rows() {
3✔
4944
        let db = seed_join_fixture();
1✔
4945
        let r = run_rows(
4946
            &db,
4947
            "SELECT customers.name, orders.amount FROM customers \
4948
             INNER JOIN orders ON customers.id = orders.customer_id;",
4949
        );
4950
        assert_eq!(r.columns, vec!["name".to_string(), "amount".to_string()]);
2✔
4951
        // Alice: 100, 200; Bob: 50. Carol drops (no orders), order 4 drops
4952
        // (no customer). 3 rows.
4953
        let pairs: Vec<(String, i64)> = r
1✔
4954
            .rows
4955
            .iter()
4956
            .map(|row| {
2✔
4957
                (
4958
                    row[0].to_display_string(),
1✔
4959
                    match row[1] {
2✔
4960
                        Value::Integer(i) => i,
1✔
4961
                        ref v => panic!("expected integer amount, got {v:?}"),
×
4962
                    },
4963
                )
4964
            })
4965
            .collect();
4966
        assert_eq!(pairs.len(), 3);
2✔
4967
        assert!(pairs.contains(&("Alice".to_string(), 100)));
1✔
4968
        assert!(pairs.contains(&("Alice".to_string(), 200)));
1✔
4969
        assert!(pairs.contains(&("Bob".to_string(), 50)));
1✔
4970
    }
4971

4972
    #[test]
4973
    fn bare_join_defaults_to_inner() {
3✔
4974
        let db = seed_join_fixture();
1✔
4975
        let r = run_rows(
4976
            &db,
4977
            "SELECT customers.name FROM customers \
4978
             JOIN orders ON customers.id = orders.customer_id;",
4979
        );
4980
        assert_eq!(r.rows.len(), 3, "JOIN without prefix should be INNER");
2✔
4981
    }
4982

4983
    #[test]
4984
    fn left_outer_join_preserves_unmatched_left() {
3✔
4985
        let db = seed_join_fixture();
1✔
4986
        let r = run_rows(
4987
            &db,
4988
            "SELECT customers.name, orders.amount FROM customers \
4989
             LEFT OUTER JOIN orders ON customers.id = orders.customer_id;",
4990
        );
4991
        // Alice: two rows. Bob: one row. Carol: one NULL-padded row.
4992
        // Order 4 is dropped (left side has no customer for id=4).
4993
        assert_eq!(r.rows.len(), 4);
2✔
4994
        let carol = r
3✔
4995
            .rows
4996
            .iter()
4997
            .find(|row| row[0].to_display_string() == "Carol")
3✔
4998
            .expect("Carol should appear with a NULL-padded right side");
4999
        assert_eq!(carol[1], Value::Null);
1✔
5000
    }
5001

5002
    #[test]
5003
    fn right_outer_join_preserves_unmatched_right() {
3✔
5004
        let db = seed_join_fixture();
1✔
5005
        let r = run_rows(
5006
            &db,
5007
            "SELECT customers.name, orders.amount FROM customers \
5008
             RIGHT OUTER JOIN orders ON customers.id = orders.customer_id;",
5009
        );
5010
        // 3 matched rows + 1 dangling order (id=4, customer_id=4 with no
5011
        // matching customer). Total 4. Carol drops because the right
5012
        // table has no row pointing at her.
5013
        assert_eq!(r.rows.len(), 4);
2✔
5014
        let dangling = r
3✔
5015
            .rows
5016
            .iter()
5017
            .find(|row| matches!(row[1], Value::Integer(999)))
3✔
5018
            .expect("dangling order 999 should appear with a NULL-padded customer name");
5019
        assert_eq!(dangling[0], Value::Null);
1✔
5020
    }
5021

5022
    #[test]
5023
    fn full_outer_join_preserves_both_sides() {
3✔
5024
        let db = seed_join_fixture();
1✔
5025
        let r = run_rows(
5026
            &db,
5027
            "SELECT customers.name, orders.amount FROM customers \
5028
             FULL OUTER JOIN orders ON customers.id = orders.customer_id;",
5029
        );
5030
        // 3 matched + 1 unmatched left (Carol) + 1 unmatched right
5031
        // (order 999) = 5 rows.
5032
        assert_eq!(r.rows.len(), 5);
2✔
5033
        // Carol with NULL amount.
5034
        assert!(
×
5035
            r.rows
3✔
5036
                .iter()
1✔
5037
                .any(|row| row[0].to_display_string() == "Carol" && matches!(row[1], Value::Null))
3✔
5038
        );
5039
        // 999 with NULL name.
5040
        assert!(
×
5041
            r.rows
3✔
5042
                .iter()
1✔
5043
                .any(|row| matches!(row[1], Value::Integer(999)) && matches!(row[0], Value::Null))
3✔
5044
        );
5045
    }
5046

5047
    #[test]
5048
    fn join_with_table_aliases_resolves_qualifiers() {
3✔
5049
        let db = seed_join_fixture();
1✔
5050
        let r = run_rows(
5051
            &db,
5052
            "SELECT c.name, o.amount FROM customers AS c \
5053
             INNER JOIN orders AS o ON c.id = o.customer_id;",
5054
        );
5055
        assert_eq!(r.rows.len(), 3);
2✔
5056
        assert_eq!(r.columns, vec!["name".to_string(), "amount".to_string()]);
1✔
5057
    }
5058

5059
    #[test]
5060
    fn join_with_where_filter_applies_after_join() {
3✔
5061
        let db = seed_join_fixture();
1✔
5062
        // Filter to only orders >= 100. With INNER JOIN, this drops Bob's
5063
        // 50-amount order, leaving Alice's 100 and 200.
5064
        let r = run_rows(
5065
            &db,
5066
            "SELECT customers.name, orders.amount FROM customers \
5067
             INNER JOIN orders ON customers.id = orders.customer_id \
5068
             WHERE orders.amount >= 100;",
5069
        );
5070
        assert_eq!(r.rows.len(), 2);
2✔
5071
        assert!(
×
5072
            r.rows
3✔
5073
                .iter()
1✔
5074
                .all(|row| row[0].to_display_string() == "Alice")
3✔
5075
        );
5076
    }
5077

5078
    #[test]
5079
    fn left_join_with_where_on_right_side_is_not_inner() {
3✔
5080
        // WHERE on the right side that excludes NULL turns LEFT JOIN
5081
        // back into INNER JOIN semantically. Verify the executor
5082
        // applies the WHERE *after* the join padded NULLs in.
5083
        let db = seed_join_fixture();
1✔
5084
        let r = run_rows(
5085
            &db,
5086
            "SELECT customers.name, orders.amount FROM customers \
5087
             LEFT OUTER JOIN orders ON customers.id = orders.customer_id \
5088
             WHERE orders.amount IS NULL;",
5089
        );
5090
        // Only Carol survives — she's the only customer with no order.
5091
        assert_eq!(r.rows.len(), 1);
2✔
5092
        assert_eq!(r.rows[0][0].to_display_string(), "Carol");
1✔
5093
        assert_eq!(r.rows[0][1], Value::Null);
1✔
5094
    }
5095

5096
    #[test]
5097
    fn select_star_over_join_emits_all_columns_from_both_tables() {
3✔
5098
        let db = seed_join_fixture();
1✔
5099
        let r = run_rows(
5100
            &db,
5101
            "SELECT * FROM customers \
5102
             INNER JOIN orders ON customers.id = orders.customer_id;",
5103
        );
5104
        // customers has 2 cols (id, name), orders has 3 cols
5105
        // (id, customer_id, amount). 5 columns total. Header order
5106
        // follows source order — primary table first.
5107
        assert_eq!(
1✔
5108
            r.columns,
5109
            vec![
3✔
5110
                "id".to_string(),
1✔
5111
                "name".to_string(),
1✔
5112
                "id".to_string(),
1✔
5113
                "customer_id".to_string(),
1✔
5114
                "amount".to_string(),
1✔
5115
            ]
5116
        );
5117
        assert_eq!(r.rows.len(), 3);
1✔
5118
    }
5119

5120
    #[test]
5121
    fn join_order_by_sorts_full_joined_rows() {
3✔
5122
        let db = seed_join_fixture();
1✔
5123
        let r = run_rows(
5124
            &db,
5125
            "SELECT c.name, o.amount FROM customers AS c \
5126
             INNER JOIN orders AS o ON c.id = o.customer_id \
5127
             ORDER BY o.amount;",
5128
        );
5129
        let amounts: Vec<i64> = r
1✔
5130
            .rows
5131
            .iter()
5132
            .map(|row| match row[1] {
3✔
5133
                Value::Integer(i) => i,
1✔
5134
                ref v => panic!("expected integer, got {v:?}"),
×
5135
            })
5136
            .collect();
5137
        assert_eq!(amounts, vec![50, 100, 200]);
2✔
5138
    }
5139

5140
    #[test]
5141
    fn join_limit_truncates_after_join_and_sort() {
3✔
5142
        let db = seed_join_fixture();
1✔
5143
        let r = run_rows(
5144
            &db,
5145
            "SELECT c.name, o.amount FROM customers AS c \
5146
             INNER JOIN orders AS o ON c.id = o.customer_id \
5147
             ORDER BY o.amount DESC LIMIT 2;",
5148
        );
5149
        assert_eq!(r.rows.len(), 2);
2✔
5150
        // Top two by amount DESC: 200 (Alice), 100 (Alice).
5151
        let amounts: Vec<i64> = r
1✔
5152
            .rows
5153
            .iter()
5154
            .map(|row| match row[1] {
3✔
5155
                Value::Integer(i) => i,
1✔
5156
                ref v => panic!("expected integer, got {v:?}"),
×
5157
            })
5158
            .collect();
5159
        assert_eq!(amounts, vec![200, 100]);
2✔
5160
    }
5161

5162
    #[test]
5163
    fn three_table_join_chains_correctly() {
3✔
5164
        let mut db = Database::new("t".to_string());
1✔
5165
        for sql in [
3✔
5166
            "CREATE TABLE a (id INTEGER PRIMARY KEY, label TEXT);",
5167
            "CREATE TABLE b (id INTEGER PRIMARY KEY, a_id INTEGER, tag TEXT);",
5168
            "CREATE TABLE c (id INTEGER PRIMARY KEY, b_id INTEGER, note TEXT);",
5169
            "INSERT INTO a (label) VALUES ('a-one');",
5170
            "INSERT INTO a (label) VALUES ('a-two');",
5171
            "INSERT INTO b (a_id, tag) VALUES (1, 'b1');",
5172
            "INSERT INTO b (a_id, tag) VALUES (2, 'b2');",
5173
            "INSERT INTO c (b_id, note) VALUES (1, 'c1');",
5174
        ] {
5175
            crate::sql::process_command(sql, &mut db).unwrap();
2✔
5176
        }
5177
        let r = run_rows(
5178
            &db,
5179
            "SELECT a.label, b.tag, c.note FROM a \
5180
             INNER JOIN b ON a.id = b.a_id \
5181
             INNER JOIN c ON b.id = c.b_id;",
5182
        );
5183
        // Only b1 has a c row. So one combined row.
5184
        assert_eq!(r.rows.len(), 1);
2✔
5185
        assert_eq!(r.rows[0][0].to_display_string(), "a-one");
1✔
5186
        assert_eq!(r.rows[0][1].to_display_string(), "b1");
1✔
5187
        assert_eq!(r.rows[0][2].to_display_string(), "c1");
1✔
5188
    }
5189

5190
    #[test]
5191
    fn ambiguous_unqualified_column_in_join_errors() {
3✔
5192
        // Both customers and orders have a column named `id`. An
5193
        // unqualified `id` in the SELECT must error rather than
5194
        // silently picking one side.
5195
        let db = seed_join_fixture();
1✔
5196
        let q = parse_select(
5197
            "SELECT id FROM customers INNER JOIN orders ON customers.id = orders.customer_id;",
5198
        );
5199
        let res = execute_select_rows(q, &db);
1✔
5200
        assert!(res.is_err(), "unqualified ambiguous 'id' should error");
2✔
5201
    }
5202

5203
    #[test]
5204
    fn join_self_without_alias_is_rejected() {
3✔
5205
        let mut db = Database::new("t".to_string());
1✔
5206
        crate::sql::process_command(
5207
            "CREATE TABLE n (id INTEGER PRIMARY KEY, parent INTEGER);",
5208
            &mut db,
5209
        )
5210
        .unwrap();
5211
        let q = parse_select("SELECT n.id FROM n INNER JOIN n ON n.id = n.parent;");
1✔
5212
        let res = execute_select_rows(q, &db);
1✔
5213
        assert!(
×
5214
            res.is_err(),
2✔
5215
            "self-join without an alias should error on duplicate qualifier"
5216
        );
5217
    }
5218

5219
    // ----- SQLR-5 follow-up: USING / NATURAL / CROSS joins -----
5220

5221
    /// `customers` and `orders` both have an `id` column. Joining on it
5222
    /// via USING must produce exactly the same rows as the equivalent
5223
    /// explicit `ON customers.id = orders.id`.
5224
    #[test]
5225
    fn join_using_matches_same_rows_as_on() {
3✔
5226
        let db = seed_join_fixture();
1✔
5227
        let using = run_rows(
5228
            &db,
5229
            "SELECT customers.name, orders.amount FROM customers \
5230
             INNER JOIN orders USING (id) ORDER BY orders.amount;",
5231
        );
5232
        let on = run_rows(
5233
            &db,
5234
            "SELECT customers.name, orders.amount FROM customers \
5235
             INNER JOIN orders ON customers.id = orders.id ORDER BY orders.amount;",
5236
        );
5237
        // id matches: cust1↔order1 (100), cust2↔order2 (200), cust3↔order3 (50).
5238
        let pairs: Vec<(String, Value)> = using
1✔
5239
            .rows
5240
            .iter()
5241
            .map(|r| (r[0].to_display_string(), r[1].clone()))
3✔
5242
            .collect();
5243
        assert_eq!(pairs.len(), 3);
2✔
5244
        assert_eq!(
1✔
5245
            using.rows, on.rows,
5246
            "USING must mirror the explicit ON rows"
5247
        );
5248
    }
5249

5250
    /// `SELECT *` over a USING join shows the joined-on column once
5251
    /// (SQLite convention), taking the left side's copy.
5252
    #[test]
5253
    fn select_star_using_dedups_joined_column() {
3✔
5254
        let db = seed_join_fixture();
1✔
5255
        let r = run_rows(&db, "SELECT * FROM customers INNER JOIN orders USING (id);");
1✔
5256
        // Without USING dedup this would be 5 columns (id,name,id,
5257
        // customer_id,amount). USING(id) collapses the duplicate `id`
5258
        // to one, leaving 4 in source order.
5259
        assert_eq!(
1✔
5260
            r.columns,
5261
            vec![
3✔
5262
                "id".to_string(),
1✔
5263
                "name".to_string(),
1✔
5264
                "customer_id".to_string(),
1✔
5265
                "amount".to_string(),
1✔
5266
            ]
5267
        );
5268
        assert_eq!(r.rows.len(), 3);
1✔
5269
        // Each surviving row's single `id` equals both sides' id (they
5270
        // were matched on equality), so the left copy is correct.
5271
        for row in &r.rows {
1✔
5272
            assert!(matches!(row[0], Value::Integer(_)));
2✔
5273
        }
5274
    }
5275

5276
    fn seed_natural_fixture() -> Database {
1✔
5277
        let mut db = Database::new("t".to_string());
1✔
5278
        for sql in [
3✔
5279
            // Distinct PK names (lid / rid) so the *only* shared columns
5280
            // are k1 and k2 — NATURAL must match on both with AND.
5281
            "CREATE TABLE l (lid INTEGER PRIMARY KEY, k1 INTEGER, k2 INTEGER, v1 TEXT);",
5282
            "CREATE TABLE r (rid INTEGER PRIMARY KEY, k1 INTEGER, k2 INTEGER, v2 TEXT);",
5283
            "INSERT INTO l (k1, k2, v1) VALUES (1, 1, 'l-a');",
5284
            "INSERT INTO l (k1, k2, v1) VALUES (1, 2, 'l-b');",
5285
            "INSERT INTO l (k1, k2, v1) VALUES (2, 1, 'l-c');",
5286
            "INSERT INTO r (k1, k2, v2) VALUES (1, 1, 'r-a');",
5287
            "INSERT INTO r (k1, k2, v2) VALUES (1, 2, 'r-b');",
5288
            "INSERT INTO r (k1, k2, v2) VALUES (9, 9, 'r-z');",
5289
        ] {
5290
            crate::sql::process_command(sql, &mut db).unwrap();
2✔
5291
        }
5292
        db
1✔
5293
    }
5294

5295
    /// NATURAL JOIN auto-discovers the shared columns (k1, k2) and
5296
    /// matches on both with AND.
5297
    #[test]
5298
    fn natural_join_matches_on_all_shared_columns() {
3✔
5299
        let db = seed_natural_fixture();
1✔
5300
        let natural = run_rows(&db, "SELECT v1, v2 FROM l NATURAL JOIN r ORDER BY v1;");
1✔
5301
        // (1,1)->l-a/r-a and (1,2)->l-b/r-b match. (2,1) and (9,9) don't.
5302
        let pairs: Vec<(String, String)> = natural
1✔
5303
            .rows
5304
            .iter()
5305
            .map(|r| (r[0].to_display_string(), r[1].to_display_string()))
3✔
5306
            .collect();
5307
        assert_eq!(
1✔
5308
            pairs,
5309
            vec![
3✔
5310
                ("l-a".to_string(), "r-a".to_string()),
2✔
5311
                ("l-b".to_string(), "r-b".to_string()),
2✔
5312
            ]
5313
        );
5314
        // Equivalent explicit form yields the same rows.
5315
        let explicit = run_rows(
5316
            &db,
5317
            "SELECT v1, v2 FROM l INNER JOIN r ON l.k1 = r.k1 AND l.k2 = r.k2 ORDER BY v1;",
5318
        );
5319
        assert_eq!(natural.rows, explicit.rows);
2✔
5320
    }
5321

5322
    /// `SELECT *` over a NATURAL join shows each shared column once.
5323
    #[test]
5324
    fn select_star_natural_dedups_shared_columns() {
3✔
5325
        let db = seed_natural_fixture();
1✔
5326
        let r = run_rows(&db, "SELECT * FROM l NATURAL JOIN r;");
1✔
5327
        // Source order with k1,k2 taken from the left only:
5328
        // l: lid, k1, k2, v1 ; r: rid, v2  (k1,k2 dropped from r).
5329
        assert_eq!(
1✔
5330
            r.columns,
5331
            vec![
3✔
5332
                "lid".to_string(),
1✔
5333
                "k1".to_string(),
1✔
5334
                "k2".to_string(),
1✔
5335
                "v1".to_string(),
1✔
5336
                "rid".to_string(),
1✔
5337
                "v2".to_string(),
1✔
5338
            ]
5339
        );
5340
        assert_eq!(r.rows.len(), 2);
1✔
5341
    }
5342

5343
    /// NATURAL JOIN between tables with no shared column names degrades
5344
    /// to a cross product, matching SQLite.
5345
    #[test]
5346
    fn natural_join_without_common_columns_is_cross_product() {
3✔
5347
        let mut db = Database::new("t".to_string());
1✔
5348
        for sql in [
3✔
5349
            "CREATE TABLE p (pid INTEGER PRIMARY KEY, pa TEXT);",
5350
            "CREATE TABLE q (qid INTEGER PRIMARY KEY, qb TEXT);",
5351
            "INSERT INTO p (pa) VALUES ('p1');",
5352
            "INSERT INTO p (pa) VALUES ('p2');",
5353
            "INSERT INTO q (qb) VALUES ('q1');",
5354
            "INSERT INTO q (qb) VALUES ('q2');",
5355
            "INSERT INTO q (qb) VALUES ('q3');",
5356
        ] {
5357
            crate::sql::process_command(sql, &mut db).unwrap();
2✔
5358
        }
5359
        let r = run_rows(&db, "SELECT p.pa, q.qb FROM p NATURAL JOIN q;");
1✔
5360
        assert_eq!(r.rows.len(), 2 * 3, "no shared columns ⇒ cross product");
2✔
5361
    }
5362

5363
    /// CROSS JOIN produces the full cartesian product and is equivalent
5364
    /// to `INNER JOIN ... ON 1`.
5365
    #[test]
5366
    fn cross_join_produces_cartesian_product() {
3✔
5367
        let db = seed_join_fixture();
1✔
5368
        let cross = run_rows(
5369
            &db,
5370
            "SELECT customers.name, orders.amount FROM customers CROSS JOIN orders;",
5371
        );
5372
        // 3 customers × 4 orders = 12 rows.
5373
        assert_eq!(cross.rows.len(), 12);
2✔
5374
        let on_true = run_rows(
5375
            &db,
5376
            "SELECT customers.name, orders.amount FROM customers INNER JOIN orders ON 1;",
5377
        );
5378
        assert_eq!(cross.rows.len(), on_true.rows.len());
2✔
5379
        // SELECT * over a cross join keeps every column from both sides.
5380
        let star = run_rows(&db, "SELECT * FROM customers CROSS JOIN orders;");
1✔
5381
        assert_eq!(star.columns.len(), 5);
2✔
5382
        assert_eq!(star.rows.len(), 12);
1✔
5383
    }
5384

5385
    /// A LEFT OUTER join expressed with USING still preserves unmatched
5386
    /// left rows (NULL-padding the right), and the deduplicated column
5387
    /// keeps the left side's value.
5388
    #[test]
5389
    fn left_outer_join_using_preserves_unmatched_left() {
3✔
5390
        let db = seed_join_fixture();
1✔
5391
        let r = run_rows(
5392
            &db,
5393
            "SELECT * FROM customers LEFT OUTER JOIN orders USING (id);",
5394
        );
5395
        // customers ids 1,2,3 each match an order id; none are unmatched
5396
        // here, so confirm the dedup + row count instead. 4 columns,
5397
        // 3 matched rows (orders has no id=customer beyond 1..3 overlap).
5398
        assert_eq!(r.columns.len(), 4, "id is shown once");
2✔
5399
        assert_eq!(r.rows.len(), 3);
2✔
5400
    }
5401

5402
    /// USING a column that doesn't exist on one of the sides is a clean
5403
    /// error, not a silent empty result.
5404
    #[test]
5405
    fn using_unknown_column_errors() {
3✔
5406
        let db = seed_join_fixture();
1✔
5407
        let q = parse_select("SELECT * FROM customers INNER JOIN orders USING (nope);");
1✔
5408
        let res = execute_select_rows(q, &db);
1✔
5409
        assert!(res.is_err(), "USING (nope) must error — column absent");
2✔
5410
    }
5411

5412
    // ---------------------------------------------------------------------
5413
    // SQLR-6 — aggregates / GROUP BY / DISTINCT over JOIN results
5414
    // ---------------------------------------------------------------------
5415

5416
    #[test]
5417
    fn group_by_with_aggregates_over_inner_join() {
3✔
5418
        let db = seed_join_fixture();
1✔
5419
        let r = run_rows(
5420
            &db,
5421
            "SELECT customers.name, COUNT(*), SUM(orders.amount) FROM customers \
5422
             INNER JOIN orders ON customers.id = orders.customer_id \
5423
             GROUP BY customers.name ORDER BY customers.name;",
5424
        );
5425
        assert_eq!(r.columns, vec!["name", "COUNT(*)", "SUM(orders.amount)"]);
2✔
5426
        assert_eq!(r.rows.len(), 2);
1✔
5427
        assert_eq!(r.rows[0][0].to_display_string(), "Alice");
1✔
5428
        assert_eq!(expect_int(&r.rows[0][1]), 2);
1✔
5429
        assert_eq!(expect_int(&r.rows[0][2]), 300);
1✔
5430
        assert_eq!(r.rows[1][0].to_display_string(), "Bob");
1✔
5431
        assert_eq!(expect_int(&r.rows[1][1]), 1);
1✔
5432
        assert_eq!(expect_int(&r.rows[1][2]), 50);
1✔
5433
    }
5434

5435
    #[test]
5436
    fn aggregates_over_join_without_group_by() {
3✔
5437
        let db = seed_join_fixture();
1✔
5438
        let r = run_rows(
5439
            &db,
5440
            "SELECT COUNT(*), SUM(orders.amount) FROM customers \
5441
             INNER JOIN orders ON customers.id = orders.customer_id;",
5442
        );
5443
        assert_eq!(r.rows.len(), 1);
2✔
5444
        assert_eq!(expect_int(&r.rows[0][0]), 3);
1✔
5445
        assert_eq!(expect_int(&r.rows[0][1]), 350);
1✔
5446
    }
5447

5448
    #[test]
5449
    fn count_column_skips_outer_join_null_padding() {
3✔
5450
        // Carol has no orders: her LEFT-JOIN row is NULL-padded on the
5451
        // right. COUNT(*) counts the padded row; COUNT(orders.id) skips
5452
        // its NULL, per the usual NULL-skipping aggregate semantics.
5453
        let db = seed_join_fixture();
1✔
5454
        let r = run_rows(
5455
            &db,
5456
            "SELECT customers.name, COUNT(*), COUNT(orders.id) FROM customers \
5457
             LEFT OUTER JOIN orders ON customers.id = orders.customer_id \
5458
             GROUP BY customers.name ORDER BY customers.name;",
5459
        );
5460
        assert_eq!(r.rows.len(), 3);
2✔
5461
        let carol = &r.rows[2];
1✔
5462
        assert_eq!(carol[0].to_display_string(), "Carol");
1✔
5463
        assert_eq!(expect_int(&carol[1]), 1, "COUNT(*) counts the padded row");
1✔
5464
        assert_eq!(expect_int(&carol[2]), 0, "COUNT(col) skips the NULL");
2✔
5465
    }
5466

5467
    #[test]
5468
    fn outer_join_null_keys_group_together() {
3✔
5469
        // FULL OUTER surfaces the dangling order (customer_id 4) with a
5470
        // NULL customers.name — it must form its own group, not vanish.
5471
        let db = seed_join_fixture();
1✔
5472
        let r = run_rows(
5473
            &db,
5474
            "SELECT customers.name, COUNT(*) FROM customers \
5475
             FULL OUTER JOIN orders ON customers.id = orders.customer_id \
5476
             GROUP BY customers.name;",
5477
        );
5478
        assert_eq!(r.rows.len(), 4, "Alice, Bob, Carol, NULL");
2✔
5479
        let null_group = r
3✔
5480
            .rows
5481
            .iter()
5482
            .find(|row| row[0] == Value::Null)
3✔
5483
            .expect("dangling order groups under NULL");
5484
        assert_eq!(expect_int(&null_group[1]), 1);
1✔
5485
    }
5486

5487
    #[test]
5488
    fn count_distinct_over_join() {
3✔
5489
        let db = seed_join_fixture();
1✔
5490
        let r = run_rows(
5491
            &db,
5492
            "SELECT COUNT(DISTINCT customers.name) FROM customers \
5493
             INNER JOIN orders ON customers.id = orders.customer_id;",
5494
        );
5495
        assert_eq!(expect_int(&r.rows[0][0]), 2);
2✔
5496
    }
5497

5498
    #[test]
5499
    fn group_by_qualified_key_resolves_ambiguous_name() {
3✔
5500
        // `id` exists on both tables — the qualified GROUP BY key picks
5501
        // the customers side.
5502
        let db = seed_join_fixture();
1✔
5503
        let r = run_rows(
5504
            &db,
5505
            "SELECT customers.id, COUNT(*) FROM customers \
5506
             INNER JOIN orders ON customers.id = orders.customer_id \
5507
             GROUP BY customers.id ORDER BY customers.id;",
5508
        );
5509
        assert_eq!(r.rows.len(), 2);
2✔
5510
        assert_eq!(expect_int(&r.rows[0][0]), 1);
1✔
5511
        assert_eq!(expect_int(&r.rows[0][1]), 2);
1✔
5512
    }
5513

5514
    #[test]
5515
    fn group_by_ambiguous_unqualified_key_over_join_errors() {
3✔
5516
        let err = crate::sql::process_command(
5517
            "SELECT COUNT(*) FROM customers \
5518
             INNER JOIN orders ON customers.id = orders.customer_id GROUP BY id;",
5519
            &mut seed_join_fixture(),
1✔
5520
        );
5521
        match err {
1✔
5522
            Err(e) => assert!(
1✔
5523
                e.to_string().contains("ambiguous"),
3✔
5524
                "unexpected message: {e}"
5525
            ),
5526
            Ok(_) => panic!("ambiguous GROUP BY key must error"),
×
5527
        }
5528
    }
5529

5530
    #[test]
5531
    fn bare_column_not_in_group_by_over_join_errors() {
3✔
5532
        let err = crate::sql::process_command(
5533
            "SELECT orders.amount, COUNT(*) FROM customers \
5534
             INNER JOIN orders ON customers.id = orders.customer_id \
5535
             GROUP BY customers.name;",
5536
            &mut seed_join_fixture(),
1✔
5537
        );
5538
        match err {
1✔
5539
            Err(e) => assert!(
1✔
5540
                e.to_string().contains("must appear in GROUP BY"),
3✔
5541
                "unexpected message: {e}"
5542
            ),
5543
            Ok(_) => panic!("bare column outside GROUP BY must error"),
×
5544
        }
5545
    }
5546

5547
    #[test]
5548
    fn aggregate_in_where_over_join_errors_cleanly() {
3✔
5549
        // Code-review gap from SQLR-5: aggregate misuse inside WHERE on
5550
        // a joined query must be a typed error, not wrong results.
5551
        let err = crate::sql::process_command(
5552
            "SELECT COUNT(*) FROM customers \
5553
             INNER JOIN orders ON customers.id = orders.customer_id \
5554
             WHERE COUNT(*) > 1;",
5555
            &mut seed_join_fixture(),
1✔
5556
        );
5557
        match err {
1✔
5558
            Err(SQLRiteError::NotImplemented(msg)) => assert!(
1✔
5559
                msg.contains("not allowed in WHERE"),
2✔
5560
                "unexpected message: {msg}"
5561
            ),
5562
            other => panic!("expected NotImplemented, got {other:?}"),
×
5563
        }
5564
    }
5565

5566
    #[test]
5567
    fn order_by_aggregate_over_join() {
3✔
5568
        let db = seed_join_fixture();
1✔
5569
        let r = run_rows(
5570
            &db,
5571
            "SELECT customers.name, SUM(orders.amount) FROM customers \
5572
             INNER JOIN orders ON customers.id = orders.customer_id \
5573
             GROUP BY customers.name ORDER BY SUM(orders.amount) DESC;",
5574
        );
5575
        assert_eq!(r.rows[0][0].to_display_string(), "Alice");
2✔
5576
        // Qualifier-stripped fallback: ORDER BY SUM(amount) finds the
5577
        // SUM(orders.amount) slot even though the spellings differ.
5578
        let r2 = run_rows(
5579
            &db,
5580
            "SELECT customers.name, SUM(orders.amount) FROM customers \
5581
             INNER JOIN orders ON customers.id = orders.customer_id \
5582
             GROUP BY customers.name ORDER BY SUM(amount) DESC;",
5583
        );
5584
        assert_eq!(r2.rows[0][0].to_display_string(), "Alice");
2✔
5585
    }
5586

5587
    #[test]
5588
    fn distinct_over_join_dedupes_output_rows() {
3✔
5589
        let db = seed_join_fixture();
1✔
5590
        let r = run_rows(
5591
            &db,
5592
            "SELECT DISTINCT customers.name FROM customers \
5593
             INNER JOIN orders ON customers.id = orders.customer_id;",
5594
        );
5595
        assert_eq!(r.rows.len(), 2);
2✔
5596
        let names: Vec<String> = r
1✔
5597
            .rows
5598
            .iter()
5599
            .map(|row| row[0].to_display_string())
3✔
5600
            .collect();
5601
        assert_eq!(names, vec!["Alice".to_string(), "Bob".to_string()]);
2✔
5602
    }
5603

5604
    #[test]
5605
    fn distinct_over_join_defers_limit_past_dedupe() {
3✔
5606
        // Without deferral, LIMIT 2 would truncate the joined rows to
5607
        // Alice's two orders and dedupe to a single row.
5608
        let db = seed_join_fixture();
1✔
5609
        let r = run_rows(
5610
            &db,
5611
            "SELECT DISTINCT customers.name FROM customers \
5612
             INNER JOIN orders ON customers.id = orders.customer_id LIMIT 2;",
5613
        );
5614
        assert_eq!(r.rows.len(), 2, "LIMIT applies after DISTINCT collapses");
2✔
5615
    }
5616

5617
    #[test]
5618
    fn select_star_group_by_errors_instead_of_panicking() {
3✔
5619
        // Single-table regression: the parser's "must appear in GROUP BY"
5620
        // check skips `SELECT *`, so the executor used to hit an
5621
        // `expect()` panic when a non-grouped column reached projection.
5622
        let err = crate::sql::process_command(
5623
            "SELECT * FROM orders GROUP BY customer_id;",
5624
            &mut seed_join_fixture(),
1✔
5625
        );
5626
        match err {
1✔
5627
            Err(e) => assert!(
1✔
5628
                e.to_string().contains("must appear in GROUP BY"),
3✔
5629
                "unexpected message: {e}"
5630
            ),
5631
            Ok(_) => panic!("SELECT * with GROUP BY must error, not panic"),
×
5632
        }
5633
    }
5634

5635
    #[test]
5636
    fn group_by_qualified_key_single_table_still_works() {
3✔
5637
        // Qualified GROUP BY keys are accepted on the single-table path
5638
        // too (qualifier ignored, same posture as projections).
5639
        let db = seed_employees();
1✔
5640
        let r = run_rows(
5641
            &db,
5642
            "SELECT dept, COUNT(*) FROM emp GROUP BY emp.dept ORDER BY dept;",
5643
        );
5644
        assert_eq!(r.rows.len(), 3, "eng / sales / ops");
2✔
5645
    }
5646

5647
    #[test]
5648
    fn left_join_with_no_matches_pads_every_row() {
3✔
5649
        let mut db = Database::new("t".to_string());
1✔
5650
        for sql in [
3✔
5651
            "CREATE TABLE a (id INTEGER PRIMARY KEY, x INTEGER);",
5652
            "CREATE TABLE b (id INTEGER PRIMARY KEY, y INTEGER);",
5653
            "INSERT INTO a (x) VALUES (1);",
5654
            "INSERT INTO a (x) VALUES (2);",
5655
            "INSERT INTO b (y) VALUES (10);",
5656
        ] {
5657
            crate::sql::process_command(sql, &mut db).unwrap();
2✔
5658
        }
5659
        // ON condition matches nothing.
5660
        let r = run_rows(
5661
            &db,
5662
            "SELECT a.x, b.y FROM a LEFT OUTER JOIN b ON a.x = b.y;",
5663
        );
5664
        assert_eq!(r.rows.len(), 2);
2✔
5665
        for row in &r.rows {
1✔
5666
            assert_eq!(row[1], Value::Null);
2✔
5667
        }
5668
    }
5669

5670
    #[test]
5671
    fn left_outer_join_order_by_places_nulls_first() {
3✔
5672
        // NULL ordering matches the engine-wide rule: NULL is Less
5673
        // than every concrete value (see compare_values). So an
5674
        // ORDER BY of a NULL-padded right column puts the
5675
        // outer-join row at the top under ASC.
5676
        let db = seed_join_fixture();
1✔
5677
        let r = run_rows(
5678
            &db,
5679
            "SELECT c.name, o.amount FROM customers AS c \
5680
             LEFT OUTER JOIN orders AS o ON c.id = o.customer_id \
5681
             ORDER BY o.amount ASC;",
5682
        );
5683
        assert_eq!(r.rows.len(), 4);
2✔
5684
        // Carol's NULL amount sorts first.
5685
        assert_eq!(r.rows[0][0].to_display_string(), "Carol");
1✔
5686
        assert_eq!(r.rows[0][1], Value::Null);
1✔
5687
    }
5688

5689
    #[test]
5690
    fn chained_left_outer_join_preserves_left_through_two_levels() {
3✔
5691
        // A LEFT JOIN B LEFT JOIN C — a row in A with no match in B
5692
        // must survive both joins with NULL padding for both sides.
5693
        let mut db = Database::new("t".to_string());
1✔
5694
        for sql in [
3✔
5695
            "CREATE TABLE a (id INTEGER PRIMARY KEY, label TEXT);",
5696
            "CREATE TABLE b (id INTEGER PRIMARY KEY, a_id INTEGER, tag TEXT);",
5697
            "CREATE TABLE c (id INTEGER PRIMARY KEY, b_id INTEGER, note TEXT);",
5698
            "INSERT INTO a (label) VALUES ('a-one');",
5699
            "INSERT INTO a (label) VALUES ('a-two');",
5700
            // b only matches a-one.
5701
            "INSERT INTO b (a_id, tag) VALUES (1, 'b1');",
5702
            // No c rows at all.
5703
        ] {
5704
            crate::sql::process_command(sql, &mut db).unwrap();
2✔
5705
        }
5706
        let r = run_rows(
5707
            &db,
5708
            "SELECT a.label, b.tag, c.note FROM a \
5709
             LEFT OUTER JOIN b ON a.id = b.a_id \
5710
             LEFT OUTER JOIN c ON b.id = c.b_id;",
5711
        );
5712
        // Two rows: a-one + b1 with c=NULL, and a-two with b=NULL+c=NULL.
5713
        assert_eq!(r.rows.len(), 2);
2✔
5714
        let by_label: std::collections::HashMap<String, &Vec<Value>> = r
1✔
5715
            .rows
5716
            .iter()
5717
            .map(|row| (row[0].to_display_string(), row))
3✔
5718
            .collect();
5719
        assert_eq!(by_label["a-one"][1].to_display_string(), "b1");
2✔
5720
        assert_eq!(by_label["a-one"][2], Value::Null);
1✔
5721
        assert_eq!(by_label["a-two"][1], Value::Null);
1✔
5722
        assert_eq!(by_label["a-two"][2], Value::Null);
1✔
5723
    }
5724

5725
    #[test]
5726
    fn on_clause_referencing_not_yet_joined_table_errors_clearly() {
3✔
5727
        // ON should only see tables joined so far. Referencing a
5728
        // table that hasn't joined yet is a clean error rather than
5729
        // silently NULL-coalescing into "ON evaluated false".
5730
        let mut db = Database::new("t".to_string());
1✔
5731
        for sql in [
3✔
5732
            "CREATE TABLE a (id INTEGER PRIMARY KEY, x INTEGER);",
5733
            "CREATE TABLE b (id INTEGER PRIMARY KEY, x INTEGER);",
5734
            "CREATE TABLE c (id INTEGER PRIMARY KEY, x INTEGER);",
5735
            "INSERT INTO a (x) VALUES (1);",
5736
            "INSERT INTO b (x) VALUES (1);",
5737
            "INSERT INTO c (x) VALUES (1);",
5738
        ] {
5739
            crate::sql::process_command(sql, &mut db).unwrap();
2✔
5740
        }
5741
        let q =
5742
            parse_select("SELECT a.x FROM a INNER JOIN b ON a.x = c.x INNER JOIN c ON b.x = c.x;");
5743
        let res = execute_select_rows(q, &db);
1✔
5744
        assert!(
×
5745
            res.is_err(),
2✔
5746
            "ON referencing not-yet-joined table 'c' should error"
5747
        );
5748
    }
5749

5750
    #[test]
5751
    fn join_on_truthy_integer_is_accepted() {
3✔
5752
        // ON `1` should be treated as true, like WHERE 1. Verifies
5753
        // the executor reuses eval_predicate_scope's truthiness
5754
        // semantic on JOIN conditions.
5755
        let mut db = Database::new("t".to_string());
1✔
5756
        for sql in [
3✔
5757
            "CREATE TABLE a (id INTEGER PRIMARY KEY, x INTEGER);",
5758
            "CREATE TABLE b (id INTEGER PRIMARY KEY, y INTEGER);",
5759
            "INSERT INTO a (x) VALUES (1);",
5760
            "INSERT INTO a (x) VALUES (2);",
5761
            "INSERT INTO b (y) VALUES (10);",
5762
            "INSERT INTO b (y) VALUES (20);",
5763
        ] {
5764
            crate::sql::process_command(sql, &mut db).unwrap();
2✔
5765
        }
5766
        let r = run_rows(&db, "SELECT a.x, b.y FROM a INNER JOIN b ON 1;");
1✔
5767
        // ON 1 is always true → cross product → 2 × 2 = 4 rows.
5768
        assert_eq!(r.rows.len(), 4);
2✔
5769
    }
5770

5771
    #[test]
5772
    fn full_join_on_empty_tables_returns_empty() {
3✔
5773
        let mut db = Database::new("t".to_string());
1✔
5774
        for sql in [
3✔
5775
            "CREATE TABLE a (id INTEGER PRIMARY KEY, x INTEGER);",
5776
            "CREATE TABLE b (id INTEGER PRIMARY KEY, y INTEGER);",
5777
        ] {
5778
            crate::sql::process_command(sql, &mut db).unwrap();
2✔
5779
        }
5780
        let r = run_rows(
5781
            &db,
5782
            "SELECT a.x, b.y FROM a FULL OUTER JOIN b ON a.x = b.y;",
5783
        );
5784
        assert!(r.rows.is_empty());
2✔
5785
    }
5786
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc