• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

anthonypdawson / vector-inspector / 25969302440

16 May 2026 06:15PM UTC coverage: 80.687% (+0.2%) from 80.484%
25969302440

push

github

web-flow
Installation progressive enhancement (#32)

* Refactor dependencies in pyproject.toml: categorize optional dependencies into groups, add convenience bundles, and clean up core dependencies for improved clarity and maintainability.

* Enhance installation process and provider management

- Updated README.md to include installation options for core, recommended, and all providers, improving user guidance.
- Introduced lazy loading for connection classes in __init__.py to prevent import errors for uninstalled providers.
- Added provider detection and installation helpers in provider_detection.py to streamline provider management.
- Refactored connection handling in provider_factory.py and connection_view.py to utilize lazy imports and improve user experience.
- Updated info_panel.py to use provider_type for connection details, enhancing maintainability and clarity.

* Make run.sh executable

* feat: add provider installation dialog and enhance settings dialog

- Implemented a new dialog for installing missing database provider packages, allowing users to view installation instructions and install providers directly within the app.
- Enhanced the settings dialog to include tabs for managing optional feature groups and database providers, with background checks for availability and uninstall options.
- Added background threads for uninstalling features and providers, improving user experience during package management.
- Updated connection view to handle provider installation prompts and refresh provider lists after installations.
- Introduced lazy loading for feature dependencies, raising structured errors to guide users in installing required packages.

Co-authored-by: Copilot <copilot@github.com>

* Add tests for ProviderInstallDialog and enhance SettingsDialog feature handling

- Introduced comprehensive tests for the ProviderInstallDialog, covering instantiation, UI state, installation success and failure paths, and feature compatibility.
- Enhanced S... (continued)

891 of 1050 new or added lines in 17 files covered. (84.86%)

6 existing lines in 2 files now uncovered.

15149 of 18775 relevant lines covered (80.69%)

0.81 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.69
/src/vector_inspector/ui/views/visualization_view.py
1
"""Vector visualization view with dimensionality reduction (modular panels)."""
2

3
from __future__ import annotations
1✔
4

5
import tempfile
1✔
6
import time
1✔
7
import webbrowser
1✔
8
from datetime import UTC
1✔
9
from typing import Any, Optional
1✔
10

11
import numpy as np
1✔
12
from PySide6.QtCore import QThread, Signal
1✔
13
from PySide6.QtWidgets import (
1✔
14
    QCheckBox,
15
    QHBoxLayout,
16
    QLabel,
17
    QMessageBox,
18
    QSpinBox,
19
    QTabWidget,
20
    QVBoxLayout,
21
    QWidget,
22
)
23

24
from vector_inspector.core.connection_manager import ConnectionInstance
1✔
25

26
# Feature flags now accessed via app_state.advanced_features_enabled
27
from vector_inspector.core.logging import log_error, log_info
1✔
28
from vector_inspector.services import ClusterRunner, ThreadedTaskRunner
1✔
29
from vector_inspector.services.visualization_service import VisualizationService
1✔
30
from vector_inspector.state import AppState
1✔
31
from vector_inspector.ui.components.loading_dialog import LoadingDialog
1✔
32
from vector_inspector.ui.styles import (
1✔
33
    TAB_FONT_SIZE,
34
    TAB_FONT_WEIGHT,
35
    TAB_PADDING,
36
)
37
from vector_inspector.ui.views.visualization import ClusteringPanel, DRPanel, HistogramPanel, PlotPanel
1✔
38
from vector_inspector.utils.lazy_imports import FeatureDependencyMissingError
1✔
39

40

41
class VisualizationThread(QThread):
1✔
42
    """Background thread for dimensionality reduction."""
43

44
    finished = Signal(np.ndarray)
1✔
45
    error = Signal(str)
1✔
46
    feature_missing = Signal(str)  # emits feature_id when a dep is not installed
1✔
47

48
    def __init__(self, embeddings, method, n_components):
1✔
49
        super().__init__()
1✔
50
        self.embeddings = embeddings
1✔
51
        self.method = method
1✔
52
        self.n_components = n_components
1✔
53

54
    def run(self):
1✔
55
        """Run dimensionality reduction."""
56
        try:
1✔
57
            result = VisualizationService.reduce_dimensions(
1✔
58
                self.embeddings, method=self.method, n_components=self.n_components
59
            )
60
            if result is not None:
1✔
61
                self.finished.emit(result)
1✔
62
            else:
63
                self.error.emit("Dimensionality reduction failed")
1✔
64
        except FeatureDependencyMissingError as exc:
1✔
65
            self.feature_missing.emit(exc.feature_id)
1✔
66
        except Exception as e:
1✔
67
            log_error("Dimensionality reduction failed: %s", e, exc_info=True)
1✔
68
            self.error.emit(str(e))
1✔
69

70

71
class ClusteringThread(QThread):
1✔
72
    """Background thread for clustering."""
73

74
    finished = Signal(object)  # cluster_labels, algorithm
1✔
75
    error = Signal(str)
1✔
76

77
    def __init__(self, embeddings, algorithm, params):
1✔
78
        super().__init__()
1✔
79
        self.embeddings = embeddings
1✔
80
        self.algorithm = algorithm
1✔
81
        self.params = params
1✔
82

83
    def run(self):
1✔
84
        """Run clustering."""
85
        try:
1✔
86
            from vector_inspector.core.clustering import run_clustering
1✔
87

88
            labels, algorithm = run_clustering(self.embeddings, self.algorithm, self.params)
1✔
89
            self.finished.emit((labels, algorithm))
1✔
90
        except Exception as e:
1✔
91
            log_error("Clustering failed: %s", e, exc_info=True)
1✔
92
            self.error.emit(str(e))
1✔
93

94

95
class VisualizationDataLoadThread(QThread):
1✔
96
    """Background thread for loading visualization data."""
97

98
    finished = Signal(dict)  # data
1✔
99
    error = Signal(str)
1✔
100

101
    def __init__(self, connection, collection, sample_size, parent=None):
1✔
102
        super().__init__(parent)
1✔
103
        self.connection = connection
1✔
104
        self.collection = collection
1✔
105
        self.sample_size = sample_size
1✔
106

107
    def run(self):
1✔
108
        """Load data from collection."""
109
        try:
1✔
110
            if not self.connection:
1✔
111
                self.error.emit("No database connection available")
1✔
112
                return
1✔
113

114
            if self.sample_size is None:
1✔
115
                data = self.connection.get_all_items(self.collection)
1✔
116
            else:
117
                data = self.connection.get_all_items(self.collection, limit=self.sample_size)
1✔
118

119
            if data:
1✔
120
                self.finished.emit(data)
1✔
121
            else:
122
                self.error.emit("Failed to load data")
1✔
123
        except Exception as e:
1✔
124
            log_error("Visualization data load failed: %s", e, exc_info=True)
1✔
125
            self.error.emit(str(e))
1✔
126

127

128
class VisualizationView(QWidget):
1✔
129
    """View for visualizing vectors in 2D/3D using modular panels."""
130

131
    # Signal emitted when user wants to view a point in data browser
132
    view_in_data_browser_requested = Signal(str)  # item_id
1✔
133

134
    app_state: AppState
1✔
135
    task_runner: ThreadedTaskRunner
1✔
136
    cluster_runner: ClusterRunner
1✔
137

138
    def __init__(
1✔
139
        self,
140
        app_state: AppState,
141
        task_runner: ThreadedTaskRunner,
142
        connection_manager=None,
143
        parent=None,
144
    ):
145
        super().__init__(parent)
1✔
146

147
        # Store AppState and task runner
148
        self.app_state = app_state
1✔
149
        self.task_runner = task_runner
1✔
150
        self.cluster_runner = ClusterRunner()
1✔
151
        self.connection = self.app_state.provider
1✔
152

153
        self.current_collection = ""
1✔
154
        self.current_data = None
1✔
155
        self.reduced_data = None
1✔
156
        self.visualization_thread = None
1✔
157
        self.data_load_thread = None
1✔
158
        self.clustering_thread = None
1✔
159
        self.temp_html_files = []
1✔
160
        self.cluster_labels = None
1✔
161
        self._last_temp_html = None
1✔
162
        self.loading_dialog = LoadingDialog("Loading visualization...", self)
1✔
163
        self._connection_manager = connection_manager
1✔
164
        # Timers for status reporting
165
        self._dr_start_time: float = 0.0
1✔
166
        self._cluster_start_time: float = 0.0
1✔
167
        self._setup_ui()
1✔
168
        self._connect_plot_signals()
1✔
169

170
        # Connect to AppState signals
171
        self._connect_state_signals()
1✔
172
        # Update services with current connection if available
173
        if self.app_state.provider:
1✔
174
            self._on_provider_changed(self.app_state.provider)
1✔
175

176
    def _connect_state_signals(self) -> None:
1✔
177
        """Subscribe to AppState changes."""
178
        # React to connection changes
179
        self.app_state.provider_changed.connect(self._on_provider_changed)
1✔
180

181
        # React to collection changes
182
        self.app_state.collection_changed.connect(self._on_collection_changed)
1✔
183

184
        # React to loading state
185
        self.app_state.loading_started.connect(self._on_loading_started)
1✔
186
        self.app_state.loading_finished.connect(self._on_loading_finished)
1✔
187

188
        # React to errors
189
        self.app_state.error_occurred.connect(self._on_error)
1✔
190

191
    def _on_provider_changed(self, connection: Optional[ConnectionInstance]) -> None:
1✔
192
        """React to provider/connection change."""
193
        # Update connection
194
        self.connection = connection
1✔
195
        self.histogram_panel.set_connection(connection)
1✔
196
        # Connection change means collection is no longer known — disable actions
197
        self.set_collection_ready(False)
1✔
198

199
    def _on_collection_changed(self, collection: str) -> None:
1✔
200
        """React to collection change."""
201
        if collection:
1✔
202
            self.set_collection(collection)
1✔
203
        else:
204
            self.set_collection_ready(False)
1✔
205

206
    def _on_loading_started(self, message: str) -> None:
1✔
207
        """React to loading started."""
208
        self.loading_dialog.show_loading(message)
1✔
209

210
    def _on_loading_finished(self) -> None:
1✔
211
        """React to loading finished."""
212
        self.loading_dialog.hide()
1✔
213

214
    def _on_error(self, title: str, message: str) -> None:
1✔
215
        """React to error."""
216
        QMessageBox.critical(self, title, message)
1✔
217

218
    def _connect_plot_signals(self):
1✔
219
        """Connect plot panel signals."""
220
        self.plot_panel.view_in_data_browser.connect(self._on_view_in_data_browser)
1✔
221

222
    def _setup_ui(self):
1✔
223
        layout = QVBoxLayout(self)
1✔
224

225
        # Shared controls (sample size + use all data)
226
        shared_layout = QHBoxLayout()
1✔
227
        shared_layout.addWidget(QLabel("Sample size:"))
1✔
228
        self.sample_spin = QSpinBox()
1✔
229
        self.sample_spin.setMinimum(10)
1✔
230
        # Feature gating: limit sample size in free version
231
        if self.app_state.advanced_features_enabled:
1✔
232
            self.sample_spin.setMaximum(10000)
×
233
        else:
234
            self.sample_spin.setMaximum(500)
1✔
235
        self.sample_spin.setValue(500)
1✔
236
        self.sample_spin.setSingleStep(100)
1✔
237
        shared_layout.addWidget(self.sample_spin)
1✔
238
        self.use_all_checkbox = QCheckBox("Use all data")
1✔
239
        shared_layout.addWidget(self.use_all_checkbox)
1✔
240
        shared_layout.addStretch()
1✔
241
        layout.addLayout(shared_layout)
1✔
242

243
        # Feature gating: disable "Use all data" in free version
244
        if not self.app_state.advanced_features_enabled:
1✔
245
            self.use_all_checkbox.setEnabled(False)
1✔
246
            self.use_all_checkbox.setToolTip(self.app_state.get_feature_tooltip())
1✔
247

248
        def on_use_all_changed():
1✔
249
            self.sample_spin.setEnabled(not self.use_all_checkbox.isChecked())
1✔
250

251
        self.use_all_checkbox.stateChanged.connect(on_use_all_changed)
1✔
252

253
        # Modular panels
254
        self.clustering_panel = ClusteringPanel(self, app_state=self.app_state)
1✔
255
        self.dr_panel = DRPanel(self)
1✔
256
        self.plot_panel = PlotPanel(self)
1✔
257
        self.histogram_panel = HistogramPanel(self)
1✔
258
        self.histogram_panel.set_connection_manager(self._connection_manager)
1✔
259

260
        # Tab widget: Tab 1 = Visualization, Tab 2 = Distributions
261
        self.tab_widget = QTabWidget()
1✔
262

263
        viz_tab = QWidget()
1✔
264
        viz_layout = QVBoxLayout(viz_tab)
1✔
265
        viz_layout.setContentsMargins(0, 0, 0, 0)
1✔
266
        viz_layout.addWidget(self.clustering_panel)
1✔
267
        viz_layout.addWidget(self.dr_panel)
1✔
268
        viz_layout.addWidget(self.plot_panel, stretch=10)
1✔
269
        self.tab_widget.addTab(viz_tab, "Visualization")
1✔
270

271
        self.tab_widget.addTab(self.histogram_panel, "Distributions")
1✔
272

273
        # Make tabs more noticeable: add emoji and slightly heavier styling
274
        try:
1✔
275
            self.tab_widget.setTabText(0, "🔬 Visualization")
1✔
276
            self.tab_widget.setTabText(1, "📊 Distributions")
1✔
277
            # Local stylesheet on the QTabBar to increase weight/padding and
278
            # give a subtle selected-background so the tabs stand out.
279
            # Use highlight color from user settings (if present) to stay consistent
280
            try:
1✔
281
                # Only apply the tab highlight styling when the user explicitly
282
                # enabled accent styling. Avoids unexpectedly changing the
283
                # default widget appearance for new users.
284
                if self.app_state.settings_service.get_use_accent_enabled():
1✔
285
                    highlight = self.app_state.settings_service.get_highlight_color()
×
286
                    highlight_bg = self.app_state.settings_service.get_highlight_color_bg()
×
287

288
                    tab_style = (
×
289
                        f"QTabBar::tab {{ font-weight: {TAB_FONT_WEIGHT}; padding: {TAB_PADDING}; font-size: {TAB_FONT_SIZE};}}"
290
                        f"QTabBar::tab:selected {{ background-color: {highlight_bg}; border-bottom: 2px solid {highlight}; }}"
291
                    )
292
                    self.tab_widget.tabBar().setStyleSheet(tab_style)
×
293
                # else: leave native tab styling
294
            except Exception:
×
295
                # Best-effort; avoid crashing if styling not supported in some envs
296
                pass
×
297
        except Exception:
×
298
            # Best-effort; avoid crashing if styling not supported in some envs
299
            pass
×
300

301
        layout.addWidget(self.tab_widget, stretch=10)
1✔
302

303
        self.status_label = QLabel("No collection selected")
1✔
304
        self.status_label.setStyleSheet("color: gray;")
1✔
305
        self.status_label.setMaximumHeight(30)
1✔
306
        layout.addWidget(self.status_label)
1✔
307

308
        # Connect DRPanel generate button
309
        self.dr_panel.generate_button.clicked.connect(self._generate_visualization)
1✔
310
        self.dr_panel.open_browser_button.clicked.connect(self._open_in_browser)
1✔
311

312
        # Connect ClusteringPanel run button
313
        self.clustering_panel.cluster_button.clicked.connect(self._run_clustering)
1✔
314

315
        # Disable action buttons until a collection is selected
316
        self.set_collection_ready(False)
1✔
317

318
    def _generate_visualization(self):
1✔
319
        """Generate visualization of vectors."""
320
        # Disable browser button until plot is generated
321
        self.dr_panel.open_browser_button.setEnabled(False)
1✔
322

323
        if not self.current_collection:
1✔
324
            QMessageBox.warning(self, "No Collection", "Please select a collection first.")
1✔
325
            return
1✔
326

327
        # Check that visualization dependencies (sklearn, umap-learn) are installed
328
        from vector_inspector.core.provider_detection import get_feature_info
1✔
329

330
        viz_feature = get_feature_info("viz")
1✔
331
        if viz_feature and not viz_feature.available:
1✔
NEW
332
            from vector_inspector.ui.dialogs.provider_install_dialog import ProviderInstallDialog
×
333

NEW
334
            dlg = ProviderInstallDialog(viz_feature, parent=self)
×
NEW
335
            dlg.exec()
×
336

NEW
337
            viz_feature = get_feature_info("viz")
×
NEW
338
            if viz_feature and not viz_feature.available:
×
NEW
339
                return
×
340
            # Re-enter _generate_visualization() now that the dependency is
341
            # installed.  This is intentional: the user confirmed the install
342
            # and expects the visualization to proceed without another click.
NEW
343
            self._generate_visualization()
×
NEW
344
            return
×
345

346
        if self.use_all_checkbox.isChecked():
1✔
347
            sample_size = None
1✔
348
        else:
349
            sample_size = self.sample_spin.value()
1✔
350
        self._last_sample_size = sample_size
1✔
351

352
        # Cancel any existing data load thread
353
        if self.data_load_thread and self.data_load_thread.isRunning():
1✔
354
            self.data_load_thread.quit()
1✔
355
            self.data_load_thread.wait()
1✔
356

357
        # Create and start data load thread
358
        self.data_load_thread = VisualizationDataLoadThread(
1✔
359
            self.connection,
360
            self.current_collection,
361
            sample_size,
362
            parent=self,
363
        )
364
        self.data_load_thread.finished.connect(self._on_data_loaded)
1✔
365
        self.data_load_thread.error.connect(self._on_data_load_error)
1✔
366

367
        # Show loading dialog during data load
368
        self.loading_dialog.show_loading("Loading data for visualization...")
1✔
369
        self.data_load_thread.start()
1✔
370

371
    def _on_data_loaded(self, data: dict) -> None:
1✔
372
        """Handle successful data load."""
373
        self.loading_dialog.hide_loading()
1✔
374

375
        if (
1✔
376
            data is None
377
            or not data
378
            or "embeddings" not in data
379
            or data["embeddings"] is None
380
            or len(data["embeddings"]) == 0
381
        ):
382
            QMessageBox.warning(
1✔
383
                self,
384
                "No Data",
385
                "No embeddings found in collection. Make sure the collection contains vector embeddings.",
386
            )
387
            return
1✔
388

389
        self.current_data = data
1✔
390
        self.histogram_panel.set_data(
1✔
391
            data,
392
            collection_name=self.current_collection,
393
            sample_size=getattr(self, "_last_sample_size", None),
394
        )
395
        self.status_label.setText("Reducing dimensions...")
1✔
396
        self.dr_panel.generate_button.setEnabled(False)
1✔
397

398
        # Get parameters
399
        method = self.dr_panel.method_combo.currentText().lower()
1✔
400
        if method == "t-sne":
1✔
401
            method = "tsne"
1✔
402
        n_components = 2 if self.dr_panel.dimensions_combo.currentText() == "2D" else 3
1✔
403

404
        # Run dimensionality reduction in background thread
405
        self.visualization_thread = VisualizationThread(data["embeddings"], method, n_components)
1✔
406
        self.visualization_thread.finished.connect(self._on_reduction_finished)
1✔
407
        self.visualization_thread.error.connect(self._on_reduction_error)
1✔
408
        self.visualization_thread.feature_missing.connect(self._on_feature_missing)
1✔
409
        # Show loading during reduction
410
        self.loading_dialog.show_loading("Reducing dimensions...")
1✔
411
        self._dr_start_time = time.time()
1✔
412
        self.visualization_thread.start()
1✔
413

414
    def _on_data_load_error(self, error_message: str) -> None:
1✔
415
        """Handle data load error."""
416
        self.loading_dialog.hide_loading()
1✔
417
        QMessageBox.warning(
1✔
418
            self,
419
            "Load Error",
420
            f"Failed to load data: {error_message}",
421
        )
422

423
    def _on_reduction_finished(self, reduced_data: Any):
1✔
424
        """Handle dimensionality reduction completion."""
425
        self.loading_dialog.hide_loading()
1✔
426
        self.reduced_data = reduced_data
1✔
427
        self.plot_panel.create_plot(
1✔
428
            reduced_data=reduced_data,
429
            current_data=self.current_data,
430
            cluster_labels=self.cluster_labels,
431
            method_name=self.dr_panel.method_combo.currentText(),
432
        )
433
        self._save_temp_html()
1✔
434
        self.dr_panel.generate_button.setEnabled(True)
1✔
435
        self.dr_panel.open_browser_button.setEnabled(True)
1✔
436
        self.status_label.setText("Visualization complete")
1✔
437

438
        # Report to status bar with timing
439
        elapsed = time.time() - self._dr_start_time
1✔
440
        n_points = len(reduced_data) if reduced_data is not None else 0
1✔
441
        self.app_state.status_reporter.report_action(
1✔
442
            "Visualization",
443
            result_count=n_points,
444
            result_label="point",
445
            elapsed_seconds=elapsed,
446
        )
447

448
    def _on_reduction_error(self, error_msg: str):
1✔
449
        """Handle dimensionality reduction error."""
450
        self.loading_dialog.hide_loading()
1✔
451
        log_error("Visualization failed: %s", error_msg)
1✔
452
        QMessageBox.warning(self, "Error", f"Visualization failed: {error_msg}")
1✔
453
        self.dr_panel.generate_button.setEnabled(True)
1✔
454
        self.status_label.setText("Visualization failed")
1✔
455

456
    def _on_feature_missing(self, feature_id: str) -> None:
1✔
457
        """Open the install dialog when a reduction dep is absent at runtime."""
458
        self.loading_dialog.hide_loading()
1✔
459
        self.dr_panel.generate_button.setEnabled(True)
1✔
460
        self.status_label.setText("")
1✔
461

462
        from vector_inspector.core.provider_detection import get_feature_info
1✔
463
        from vector_inspector.ui.dialogs.provider_install_dialog import ProviderInstallDialog
1✔
464

465
        feature = get_feature_info(feature_id)
1✔
466
        if feature:
1✔
467
            dlg = ProviderInstallDialog(feature, parent=self)
1✔
468
            provider_was_installed = False
1✔
469

470
            def _mark_provider_installed(_: str) -> None:
1✔
471
                nonlocal provider_was_installed
NEW
472
                provider_was_installed = True
×
473

474
            dlg.provider_installed.connect(_mark_provider_installed)
1✔
475
            dlg.exec()
1✔
476

477
            if provider_was_installed:
1✔
478
                # Re-enter _generate_visualization() now that the missing
479
                # feature is installed.  Intentional recursion: saves the
480
                # user from having to click Generate a second time.
NEW
481
                self._generate_visualization()
×
482

483
    def _save_temp_html(self):
1✔
484
        """Save current plot HTML to temp file for browser viewing."""
485
        html = self.plot_panel.get_current_html()
1✔
486
        if html:
1✔
487
            with tempfile.NamedTemporaryFile(delete=False, suffix=".html", mode="w", encoding="utf-8") as temp_file:
1✔
488
                temp_file.write(html)
1✔
489
                temp_file.flush()
1✔
490
                self.temp_html_files.append(temp_file.name)
1✔
491
                self._last_temp_html = temp_file.name
1✔
492

493
    def _open_in_browser(self):
1✔
494
        """Open the last generated plot in a web browser."""
495
        if self._last_temp_html:
1✔
496
            webbrowser.open(f"file://{self._last_temp_html}")
1✔
497

498
    def _run_clustering(self):
1✔
499
        """Run clustering on current data."""
500
        if not self.current_collection:
1✔
501
            QMessageBox.warning(self, "No Collection", "Please select a collection first.")
1✔
502
            return
1✔
503

504
        # Load data if not already loaded
505
        if self.current_data is None:
1✔
506
            if self.use_all_checkbox.isChecked():
1✔
507
                sample_size = None
×
508
            else:
509
                sample_size = self.sample_spin.value()
1✔
510

511
            # Cancel any existing data load thread
512
            if self.data_load_thread and self.data_load_thread.isRunning():
1✔
513
                self.data_load_thread.quit()
×
514
                self.data_load_thread.wait()
×
515

516
            # Create and start data load thread for clustering
517
            self.data_load_thread = VisualizationDataLoadThread(
1✔
518
                self.connection,
519
                self.current_collection,
520
                sample_size,
521
                parent=self,
522
            )
523
            self.data_load_thread.finished.connect(self._on_clustering_data_loaded)
1✔
524
            self.data_load_thread.error.connect(self._on_data_load_error)
1✔
525

526
            # Show loading dialog during data load
527
            self.loading_dialog.show_loading("Loading data for clustering...")
1✔
528
            self.data_load_thread.start()
1✔
529
        else:
530
            # Data already loaded, proceed with clustering
531
            self._start_clustering()
1✔
532

533
    def _on_clustering_data_loaded(self, data: dict) -> None:
1✔
534
        """Handle successful data load for clustering."""
535
        self.loading_dialog.hide_loading()
1✔
536

537
        if (
1✔
538
            data is None
539
            or not data
540
            or "embeddings" not in data
541
            or data["embeddings"] is None
542
            or len(data["embeddings"]) == 0
543
        ):
544
            QMessageBox.warning(
1✔
545
                self,
546
                "No Data",
547
                "No embeddings found in collection.",
548
            )
549
            return
1✔
550

551
        self.current_data = data
1✔
552
        self.histogram_panel.set_data(
1✔
553
            data,
554
            collection_name=self.current_collection,
555
            sample_size=getattr(self, "_last_sample_size", None),
556
        )
557
        self._start_clustering()
1✔
558

559
    def _start_clustering(self) -> None:
1✔
560
        """Start clustering with already loaded data."""
561
        # Get algorithm and parameters from panel
562
        algorithm = self.clustering_panel.cluster_algorithm_combo.currentText()
1✔
563
        params = self.clustering_panel.get_clustering_params()
1✔
564

565
        # Run clustering in background thread
566
        self.loading_dialog.show_loading("Running clustering...")
1✔
567
        self.clustering_panel.cluster_button.setEnabled(False)
1✔
568

569
        self.clustering_thread = ClusteringThread(self.current_data["embeddings"], algorithm, params)
1✔
570
        self.clustering_thread.finished.connect(self._on_clustering_finished)
1✔
571
        self.clustering_thread.error.connect(self._on_clustering_error)
1✔
572
        self._cluster_start_time = time.time()
1✔
573
        self.clustering_thread.start()
1✔
574

575
    def _on_clustering_finished(self, result):
1✔
576
        """Handle clustering completion."""
577
        self.loading_dialog.hide_loading()
1✔
578
        labels, algo = result
1✔
579
        self.cluster_labels = labels
1✔
580

581
        # Count clusters
582
        unique_labels = set(self.cluster_labels)
1✔
583
        # Update clustering result label in panel
584
        if algo in ["HDBSCAN", "DBSCAN", "OPTICS"]:
1✔
585
            n_clusters = len([label for label in unique_labels if label != -1])
1✔
586
            n_noise = list(self.cluster_labels).count(-1)
1✔
587
            msg = f"Found {n_clusters} clusters, {n_noise} noise points"
1✔
588
        else:
589
            n_clusters = len(unique_labels)
1✔
590
            msg = f"Found {n_clusters} clusters"
1✔
591

592
        self.clustering_panel.cluster_result_label.setText(msg)
1✔
593
        self.clustering_panel.cluster_result_label.setVisible(True)
1✔
594
        self.status_label.setText(msg)
1✔
595
        self.status_label.setStyleSheet("color: green;")
1✔
596
        self.clustering_panel.cluster_button.setEnabled(True)
1✔
597

598
        # Report to status bar with timing
599
        elapsed = time.time() - self._cluster_start_time
1✔
600
        self.app_state.status_reporter.report_action(
1✔
601
            "Clustering",
602
            result_count=n_clusters,
603
            result_label="cluster",
604
            elapsed_seconds=elapsed,
605
        )
606

607
        # Save cluster labels to metadata if checkbox is checked
608
        if self.clustering_panel.save_to_metadata_checkbox.isChecked():
1✔
609
            self._save_cluster_labels_to_metadata()
1✔
610

611
        # Recreate plot with cluster colors if we have reduced data
612
        if self.reduced_data is not None:
1✔
613
            self.plot_panel.create_plot(
1✔
614
                reduced_data=self.reduced_data,
615
                current_data=self.current_data,
616
                cluster_labels=self.cluster_labels,
617
                method_name=self.dr_panel.method_combo.currentText(),
618
            )
619
            self._save_temp_html()
1✔
620

621
    def _save_cluster_labels_to_metadata(self):
1✔
622
        """Save cluster labels to item metadata in the database."""
623
        if not self.current_data or not self.cluster_labels.any():
1✔
624
            return
1✔
625

626
        if not self.connection:
1✔
627
            log_error("Cannot save cluster labels: no database connection")
1✔
628
            return
1✔
629

630
        if not self.current_collection:
1✔
631
            log_error("Cannot save cluster labels: no collection selected")
1✔
632
            return
1✔
633

634
        try:
1✔
635
            from datetime import datetime
1✔
636

637
            ids = self.current_data.get("ids", [])
1✔
638
            metadatas = self.current_data.get("metadatas", [])
1✔
639

640
            # Update metadata with cluster labels
641
            updated_metadatas = []
1✔
642
            for i, (item_id, metadata) in enumerate(zip(ids, metadatas)):
1✔
643
                if i >= len(self.cluster_labels):
1✔
644
                    break
×
645

646
                # Create a copy of metadata to avoid modifying original
647
                updated_meta = dict(metadata) if metadata else {}
1✔
648
                updated_meta["cluster"] = int(self.cluster_labels[i])
1✔
649
                updated_meta["updated_at"] = datetime.now(UTC).isoformat()
1✔
650
                updated_metadatas.append(updated_meta)
1✔
651

652
            # Batch update all items with new cluster metadata
653
            success = self.connection.update_items(
1✔
654
                self.current_collection,
655
                ids=ids[: len(updated_metadatas)],
656
                metadatas=updated_metadatas,
657
            )
658

659
            if success:
1✔
660
                log_info("Successfully saved %d cluster labels to metadata", len(updated_metadatas))
1✔
661
                # Update local cache
662
                self.current_data["metadatas"] = updated_metadatas
1✔
663
            else:
664
                log_error("Failed to save cluster labels to metadata")
1✔
665
                QMessageBox.warning(
1✔
666
                    self,
667
                    "Warning",
668
                    "Clustering complete, but failed to save cluster labels to metadata.",
669
                )
670
        except Exception as e:
1✔
671
            log_error("Error saving cluster labels to metadata: %s", e)
1✔
672
            QMessageBox.warning(self, "Warning", f"Clustering complete, but error saving labels to metadata: {e!s}")
1✔
673

674
    def _on_clustering_error(self, error_msg: str):
1✔
675
        """Handle clustering error."""
676
        self.loading_dialog.hide_loading()
1✔
677
        log_error("Clustering failed: %s", error_msg)
1✔
678
        QMessageBox.warning(self, "Error", f"Clustering failed: {error_msg}")
1✔
679
        self.clustering_panel.cluster_button.setEnabled(True)
1✔
680
        self.status_label.setText("Clustering failed")
1✔
681

682
    def set_collection_ready(self, ready: bool) -> None:
1✔
683
        """Enable or disable action buttons based on whether a collection is selected."""
684
        tooltip = "" if ready else "Select a collection to begin"
1✔
685
        self.dr_panel.generate_button.setEnabled(ready)
1✔
686
        self.dr_panel.generate_button.setToolTip(tooltip)
1✔
687
        self.clustering_panel.cluster_button.setEnabled(ready)
1✔
688
        self.clustering_panel.cluster_button.setToolTip(tooltip)
1✔
689
        if not ready:
1✔
690
            self.status_label.setText("Select a collection to begin")
1✔
691
            self.status_label.setStyleSheet("color: gray;")
1✔
692

693
    def set_collection(self, collection_name: str):
1✔
694
        """Set the current collection to visualize."""
695
        self.current_collection = collection_name
1✔
696
        self.set_collection_ready(True)
1✔
697
        self.current_data = None
1✔
698
        self.reduced_data = None
1✔
699
        self.cluster_labels = None
1✔
700
        # Clear clustering results when switching collection/provider
701
        try:
1✔
702
            if hasattr(self, "clustering_panel") and hasattr(self.clustering_panel, "cluster_result_label"):
1✔
703
                self.clustering_panel.cluster_result_label.setVisible(False)
1✔
704
                self.clustering_panel.cluster_result_label.setText("")
1✔
705
        except Exception:
1✔
706
            pass
1✔
707

708
        self.status_label.setText(f"Collection: {collection_name}")
1✔
709

710
    def _on_view_in_data_browser(self, _point_index: int, point_id: str):
1✔
711
        """Handle button click to view selected point in data browser.
712

713
        Args:
714
            _point_index: Index of the selected point (unused)
715
            point_id: ID of the selected point
716
        """
717
        if point_id:
1✔
718
            self.view_in_data_browser_requested.emit(point_id)
1✔
719

720
    def cleanup_temp_html(self):
1✔
721
        """Clean up temporary HTML files."""
722
        import contextlib
1✔
723
        import os
1✔
724

725
        from PySide6.QtWidgets import QApplication
1✔
726

727
        # Dispose webengine objects in child panels first so pages/views
728
        # are deleted before the WebEngineProfile is released by Qt.
729
        try:
1✔
730
            try:
1✔
731
                if hasattr(self, "plot_panel") and getattr(self.plot_panel, "dispose", None):
1✔
732
                    self.plot_panel.dispose()
1✔
733
            except Exception:
×
734
                pass
×
735
            try:
1✔
736
                if hasattr(self, "histogram_panel") and getattr(self.histogram_panel, "dispose", None):
1✔
737
                    self.histogram_panel.dispose()
1✔
738
            except Exception:
×
739
                pass
×
740

741
            # Let Qt process deletion events to avoid race conditions
742
            try:
1✔
743
                QApplication.processEvents()
1✔
744
            except Exception:
×
745
                pass
×
746
        except Exception:
×
747
            pass
×
748

749
        for f in getattr(self, "temp_html_files", []):
1✔
750
            with contextlib.suppress(Exception):
1✔
751
                os.remove(f)
1✔
752
        self.temp_html_files = []
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc