• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

anthonypdawson / vector-inspector / 24936808751

25 Apr 2026 05:44PM UTC coverage: 80.4% (-0.08%) from 80.484%
24936808751

Pull #32

github

anthonypdawson
fix: update PDM install command to correctly include development dependencies

Co-authored-by: Copilot <copilot@github.com>
Pull Request #32: Installation progressive enhancement

755 of 927 new or added lines in 16 files covered. (81.45%)

21 existing lines in 2 files now uncovered.

15173 of 18872 relevant lines covered (80.4%)

0.8 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.69
/src/vector_inspector/ui/views/visualization_view.py
1
"""Vector visualization view with dimensionality reduction (modular panels)."""
2

3
from __future__ import annotations
1✔
4

5
import tempfile
1✔
6
import time
1✔
7
import webbrowser
1✔
8
from datetime import UTC
1✔
9
from typing import Any, Optional
1✔
10

11
import numpy as np
1✔
12
from PySide6.QtCore import QThread, Signal
1✔
13
from PySide6.QtWidgets import (
1✔
14
    QCheckBox,
15
    QHBoxLayout,
16
    QLabel,
17
    QMessageBox,
18
    QSpinBox,
19
    QTabWidget,
20
    QVBoxLayout,
21
    QWidget,
22
)
23

24
from vector_inspector.core.connection_manager import ConnectionInstance
1✔
25

26
# Feature flags now accessed via app_state.advanced_features_enabled
27
from vector_inspector.core.logging import log_error, log_info
1✔
28
from vector_inspector.services import ClusterRunner, ThreadedTaskRunner
1✔
29
from vector_inspector.services.visualization_service import VisualizationService
1✔
30
from vector_inspector.state import AppState
1✔
31
from vector_inspector.ui.components.loading_dialog import LoadingDialog
1✔
32
from vector_inspector.ui.styles import (
1✔
33
    TAB_FONT_SIZE,
34
    TAB_FONT_WEIGHT,
35
    TAB_PADDING,
36
)
37
from vector_inspector.ui.views.visualization import ClusteringPanel, DRPanel, HistogramPanel, PlotPanel
1✔
38
from vector_inspector.utils.lazy_imports import FeatureDependencyMissingError
1✔
39

40

41
class VisualizationThread(QThread):
1✔
42
    """Background thread for dimensionality reduction."""
43

44
    finished = Signal(np.ndarray)
1✔
45
    error = Signal(str)
1✔
46
    feature_missing = Signal(str)  # emits feature_id when a dep is not installed
1✔
47

48
    def __init__(self, embeddings, method, n_components):
1✔
49
        super().__init__()
1✔
50
        self.embeddings = embeddings
1✔
51
        self.method = method
1✔
52
        self.n_components = n_components
1✔
53

54
    def run(self):
1✔
55
        """Run dimensionality reduction."""
56
        try:
1✔
57
            result = VisualizationService.reduce_dimensions(
1✔
58
                self.embeddings, method=self.method, n_components=self.n_components
59
            )
60
            if result is not None:
1✔
61
                self.finished.emit(result)
1✔
62
            else:
63
                self.error.emit("Dimensionality reduction failed")
1✔
64
        except FeatureDependencyMissingError as exc:
1✔
65
            self.feature_missing.emit(exc.feature_id)
1✔
66
        except Exception as e:
1✔
67
            log_error("Dimensionality reduction failed: %s", e, exc_info=True)
1✔
68
            self.error.emit(str(e))
1✔
69

70

71
class ClusteringThread(QThread):
1✔
72
    """Background thread for clustering."""
73

74
    finished = Signal(object)  # cluster_labels, algorithm
1✔
75
    error = Signal(str)
1✔
76

77
    def __init__(self, embeddings, algorithm, params):
1✔
78
        super().__init__()
1✔
79
        self.embeddings = embeddings
1✔
80
        self.algorithm = algorithm
1✔
81
        self.params = params
1✔
82

83
    def run(self):
1✔
84
        """Run clustering."""
85
        try:
1✔
86
            from vector_inspector.core.clustering import run_clustering
1✔
87

88
            labels, algorithm = run_clustering(self.embeddings, self.algorithm, self.params)
1✔
89
            self.finished.emit((labels, algorithm))
1✔
90
        except Exception as e:
1✔
91
            log_error("Clustering failed: %s", e, exc_info=True)
1✔
92
            self.error.emit(str(e))
1✔
93

94

95
class VisualizationDataLoadThread(QThread):
1✔
96
    """Background thread for loading visualization data."""
97

98
    finished = Signal(dict)  # data
1✔
99
    error = Signal(str)
1✔
100

101
    def __init__(self, connection, collection, sample_size, parent=None):
1✔
102
        super().__init__(parent)
1✔
103
        self.connection = connection
1✔
104
        self.collection = collection
1✔
105
        self.sample_size = sample_size
1✔
106

107
    def run(self):
1✔
108
        """Load data from collection."""
109
        try:
1✔
110
            if not self.connection:
1✔
111
                self.error.emit("No database connection available")
1✔
112
                return
1✔
113

114
            if self.sample_size is None:
1✔
115
                data = self.connection.get_all_items(self.collection)
1✔
116
            else:
117
                data = self.connection.get_all_items(self.collection, limit=self.sample_size)
1✔
118

119
            if data:
1✔
120
                self.finished.emit(data)
1✔
121
            else:
122
                self.error.emit("Failed to load data")
1✔
123
        except Exception as e:
1✔
124
            log_error("Visualization data load failed: %s", e, exc_info=True)
1✔
125
            self.error.emit(str(e))
1✔
126

127

128
class VisualizationView(QWidget):
1✔
129
    """View for visualizing vectors in 2D/3D using modular panels."""
130

131
    # Signal emitted when user wants to view a point in data browser
132
    view_in_data_browser_requested = Signal(str)  # item_id
1✔
133

134
    app_state: AppState
1✔
135
    task_runner: ThreadedTaskRunner
1✔
136
    cluster_runner: ClusterRunner
1✔
137

138
    def __init__(
1✔
139
        self,
140
        app_state: AppState,
141
        task_runner: ThreadedTaskRunner,
142
        connection_manager=None,
143
        parent=None,
144
    ):
145
        super().__init__(parent)
1✔
146

147
        # Store AppState and task runner
148
        self.app_state = app_state
1✔
149
        self.task_runner = task_runner
1✔
150
        self.cluster_runner = ClusterRunner()
1✔
151
        self.connection = self.app_state.provider
1✔
152

153
        self.current_collection = ""
1✔
154
        self.current_data = None
1✔
155
        self.reduced_data = None
1✔
156
        self.visualization_thread = None
1✔
157
        self.data_load_thread = None
1✔
158
        self.clustering_thread = None
1✔
159
        self.temp_html_files = []
1✔
160
        self.cluster_labels = None
1✔
161
        self._last_temp_html = None
1✔
162
        self.loading_dialog = LoadingDialog("Loading visualization...", self)
1✔
163
        self._connection_manager = connection_manager
1✔
164
        # Timers for status reporting
165
        self._dr_start_time: float = 0.0
1✔
166
        self._cluster_start_time: float = 0.0
1✔
167
        self._setup_ui()
1✔
168
        self._connect_plot_signals()
1✔
169

170
        # Connect to AppState signals
171
        self._connect_state_signals()
1✔
172
        # Update services with current connection if available
173
        if self.app_state.provider:
1✔
174
            self._on_provider_changed(self.app_state.provider)
1✔
175

176
    def _connect_state_signals(self) -> None:
1✔
177
        """Subscribe to AppState changes."""
178
        # React to connection changes
179
        self.app_state.provider_changed.connect(self._on_provider_changed)
1✔
180

181
        # React to collection changes
182
        self.app_state.collection_changed.connect(self._on_collection_changed)
1✔
183

184
        # React to loading state
185
        self.app_state.loading_started.connect(self._on_loading_started)
1✔
186
        self.app_state.loading_finished.connect(self._on_loading_finished)
1✔
187

188
        # React to errors
189
        self.app_state.error_occurred.connect(self._on_error)
1✔
190

191
    def _on_provider_changed(self, connection: Optional[ConnectionInstance]) -> None:
1✔
192
        """React to provider/connection change."""
193
        # Update connection
194
        self.connection = connection
1✔
195
        self.histogram_panel.set_connection(connection)
1✔
196
        # Connection change means collection is no longer known — disable actions
197
        self.set_collection_ready(False)
1✔
198

199
    def _on_collection_changed(self, collection: str) -> None:
1✔
200
        """React to collection change."""
201
        if collection:
1✔
202
            self.set_collection(collection)
1✔
203
        else:
204
            self.set_collection_ready(False)
1✔
205

206
    def _on_loading_started(self, message: str) -> None:
1✔
207
        """React to loading started."""
208
        self.loading_dialog.show_loading(message)
1✔
209

210
    def _on_loading_finished(self) -> None:
1✔
211
        """React to loading finished."""
212
        self.loading_dialog.hide()
1✔
213

214
    def _on_error(self, title: str, message: str) -> None:
1✔
215
        """React to error."""
216
        QMessageBox.critical(self, title, message)
1✔
217

218
    def _connect_plot_signals(self):
1✔
219
        """Connect plot panel signals."""
220
        self.plot_panel.view_in_data_browser.connect(self._on_view_in_data_browser)
1✔
221

222
    def _setup_ui(self):
1✔
223
        layout = QVBoxLayout(self)
1✔
224

225
        # Shared controls (sample size + use all data)
226
        shared_layout = QHBoxLayout()
1✔
227
        shared_layout.addWidget(QLabel("Sample size:"))
1✔
228
        self.sample_spin = QSpinBox()
1✔
229
        self.sample_spin.setMinimum(10)
1✔
230
        # Feature gating: limit sample size in free version
231
        if self.app_state.advanced_features_enabled:
1✔
232
            self.sample_spin.setMaximum(10000)
×
233
        else:
234
            self.sample_spin.setMaximum(500)
1✔
235
        self.sample_spin.setValue(500)
1✔
236
        self.sample_spin.setSingleStep(100)
1✔
237
        shared_layout.addWidget(self.sample_spin)
1✔
238
        self.use_all_checkbox = QCheckBox("Use all data")
1✔
239
        shared_layout.addWidget(self.use_all_checkbox)
1✔
240
        shared_layout.addStretch()
1✔
241
        layout.addLayout(shared_layout)
1✔
242

243
        # Feature gating: disable "Use all data" in free version
244
        if not self.app_state.advanced_features_enabled:
1✔
245
            self.use_all_checkbox.setEnabled(False)
1✔
246
            self.use_all_checkbox.setToolTip(self.app_state.get_feature_tooltip())
1✔
247

248
        def on_use_all_changed():
1✔
249
            self.sample_spin.setEnabled(not self.use_all_checkbox.isChecked())
1✔
250

251
        self.use_all_checkbox.stateChanged.connect(on_use_all_changed)
1✔
252

253
        # Modular panels
254
        self.clustering_panel = ClusteringPanel(self, app_state=self.app_state)
1✔
255
        self.dr_panel = DRPanel(self)
1✔
256
        self.plot_panel = PlotPanel(self)
1✔
257
        self.histogram_panel = HistogramPanel(self)
1✔
258
        self.histogram_panel.set_connection_manager(self._connection_manager)
1✔
259

260
        # Tab widget: Tab 1 = Visualization, Tab 2 = Distributions
261
        self.tab_widget = QTabWidget()
1✔
262

263
        viz_tab = QWidget()
1✔
264
        viz_layout = QVBoxLayout(viz_tab)
1✔
265
        viz_layout.setContentsMargins(0, 0, 0, 0)
1✔
266
        viz_layout.addWidget(self.clustering_panel)
1✔
267
        viz_layout.addWidget(self.dr_panel)
1✔
268
        viz_layout.addWidget(self.plot_panel, stretch=10)
1✔
269
        self.tab_widget.addTab(viz_tab, "Visualization")
1✔
270

271
        self.tab_widget.addTab(self.histogram_panel, "Distributions")
1✔
272

273
        # Make tabs more noticeable: add emoji and slightly heavier styling
274
        try:
1✔
275
            self.tab_widget.setTabText(0, "🔬 Visualization")
1✔
276
            self.tab_widget.setTabText(1, "📊 Distributions")
1✔
277
            # Local stylesheet on the QTabBar to increase weight/padding and
278
            # give a subtle selected-background so the tabs stand out.
279
            # Use highlight color from user settings (if present) to stay consistent
280
            try:
1✔
281
                # Only apply the tab highlight styling when the user explicitly
282
                # enabled accent styling. Avoids unexpectedly changing the
283
                # default widget appearance for new users.
284
                if self.app_state.settings_service.get_use_accent_enabled():
1✔
285
                    highlight = self.app_state.settings_service.get_highlight_color()
×
286
                    highlight_bg = self.app_state.settings_service.get_highlight_color_bg()
×
287

288
                    tab_style = (
×
289
                        f"QTabBar::tab {{ font-weight: {TAB_FONT_WEIGHT}; padding: {TAB_PADDING}; font-size: {TAB_FONT_SIZE};}}"
290
                        f"QTabBar::tab:selected {{ background-color: {highlight_bg}; border-bottom: 2px solid {highlight}; }}"
291
                    )
292
                    self.tab_widget.tabBar().setStyleSheet(tab_style)
×
293
                # else: leave native tab styling
294
            except Exception:
×
295
                # Best-effort; avoid crashing if styling not supported in some envs
296
                pass
×
297
        except Exception:
×
298
            # Best-effort; avoid crashing if styling not supported in some envs
299
            pass
×
300

301
        layout.addWidget(self.tab_widget, stretch=10)
1✔
302

303
        self.status_label = QLabel("No collection selected")
1✔
304
        self.status_label.setStyleSheet("color: gray;")
1✔
305
        self.status_label.setMaximumHeight(30)
1✔
306
        layout.addWidget(self.status_label)
1✔
307

308
        # Connect DRPanel generate button
309
        self.dr_panel.generate_button.clicked.connect(self._generate_visualization)
1✔
310
        self.dr_panel.open_browser_button.clicked.connect(self._open_in_browser)
1✔
311

312
        # Connect ClusteringPanel run button
313
        self.clustering_panel.cluster_button.clicked.connect(self._run_clustering)
1✔
314

315
        # Disable action buttons until a collection is selected
316
        self.set_collection_ready(False)
1✔
317

318
    def _generate_visualization(self):
1✔
319
        """Generate visualization of vectors."""
320
        # Disable browser button until plot is generated
321
        self.dr_panel.open_browser_button.setEnabled(False)
1✔
322

323
        if not self.current_collection:
1✔
324
            QMessageBox.warning(self, "No Collection", "Please select a collection first.")
1✔
325
            return
1✔
326

327
        # Check that visualization dependencies (sklearn, umap-learn) are installed
328
        from vector_inspector.core.provider_detection import get_feature_info
1✔
329

330
        viz_feature = get_feature_info("viz")
1✔
331
        if viz_feature and not viz_feature.available:
1✔
NEW
332
            from vector_inspector.ui.dialogs.provider_install_dialog import ProviderInstallDialog
×
333

NEW
334
            dlg = ProviderInstallDialog(viz_feature, parent=self)
×
NEW
335
            dlg.exec()
×
336

NEW
337
            viz_feature = get_feature_info("viz")
×
NEW
338
            if viz_feature and not viz_feature.available:
×
NEW
339
                return
×
NEW
340
            self._generate_visualization()
×
NEW
341
            return
×
342

343
        if self.use_all_checkbox.isChecked():
1✔
344
            sample_size = None
1✔
345
        else:
346
            sample_size = self.sample_spin.value()
1✔
347
        self._last_sample_size = sample_size
1✔
348

349
        # Cancel any existing data load thread
350
        if self.data_load_thread and self.data_load_thread.isRunning():
1✔
351
            self.data_load_thread.quit()
1✔
352
            self.data_load_thread.wait()
1✔
353

354
        # Create and start data load thread
355
        self.data_load_thread = VisualizationDataLoadThread(
1✔
356
            self.connection,
357
            self.current_collection,
358
            sample_size,
359
            parent=self,
360
        )
361
        self.data_load_thread.finished.connect(self._on_data_loaded)
1✔
362
        self.data_load_thread.error.connect(self._on_data_load_error)
1✔
363

364
        # Show loading dialog during data load
365
        self.loading_dialog.show_loading("Loading data for visualization...")
1✔
366
        self.data_load_thread.start()
1✔
367

368
    def _on_data_loaded(self, data: dict) -> None:
1✔
369
        """Handle successful data load."""
370
        self.loading_dialog.hide_loading()
1✔
371

372
        if (
1✔
373
            data is None
374
            or not data
375
            or "embeddings" not in data
376
            or data["embeddings"] is None
377
            or len(data["embeddings"]) == 0
378
        ):
379
            QMessageBox.warning(
1✔
380
                self,
381
                "No Data",
382
                "No embeddings found in collection. Make sure the collection contains vector embeddings.",
383
            )
384
            return
1✔
385

386
        self.current_data = data
1✔
387
        self.histogram_panel.set_data(
1✔
388
            data,
389
            collection_name=self.current_collection,
390
            sample_size=getattr(self, "_last_sample_size", None),
391
        )
392
        self.status_label.setText("Reducing dimensions...")
1✔
393
        self.dr_panel.generate_button.setEnabled(False)
1✔
394

395
        # Get parameters
396
        method = self.dr_panel.method_combo.currentText().lower()
1✔
397
        if method == "t-sne":
1✔
398
            method = "tsne"
1✔
399
        n_components = 2 if self.dr_panel.dimensions_combo.currentText() == "2D" else 3
1✔
400

401
        # Run dimensionality reduction in background thread
402
        self.visualization_thread = VisualizationThread(data["embeddings"], method, n_components)
1✔
403
        self.visualization_thread.finished.connect(self._on_reduction_finished)
1✔
404
        self.visualization_thread.error.connect(self._on_reduction_error)
1✔
405
        self.visualization_thread.feature_missing.connect(self._on_feature_missing)
1✔
406
        # Show loading during reduction
407
        self.loading_dialog.show_loading("Reducing dimensions...")
1✔
408
        self._dr_start_time = time.time()
1✔
409
        self.visualization_thread.start()
1✔
410

411
    def _on_data_load_error(self, error_message: str) -> None:
1✔
412
        """Handle data load error."""
413
        self.loading_dialog.hide_loading()
1✔
414
        QMessageBox.warning(
1✔
415
            self,
416
            "Load Error",
417
            f"Failed to load data: {error_message}",
418
        )
419

420
    def _on_reduction_finished(self, reduced_data: Any):
1✔
421
        """Handle dimensionality reduction completion."""
422
        self.loading_dialog.hide_loading()
1✔
423
        self.reduced_data = reduced_data
1✔
424
        self.plot_panel.create_plot(
1✔
425
            reduced_data=reduced_data,
426
            current_data=self.current_data,
427
            cluster_labels=self.cluster_labels,
428
            method_name=self.dr_panel.method_combo.currentText(),
429
        )
430
        self._save_temp_html()
1✔
431
        self.dr_panel.generate_button.setEnabled(True)
1✔
432
        self.dr_panel.open_browser_button.setEnabled(True)
1✔
433
        self.status_label.setText("Visualization complete")
1✔
434

435
        # Report to status bar with timing
436
        elapsed = time.time() - self._dr_start_time
1✔
437
        n_points = len(reduced_data) if reduced_data is not None else 0
1✔
438
        self.app_state.status_reporter.report_action(
1✔
439
            "Visualization",
440
            result_count=n_points,
441
            result_label="point",
442
            elapsed_seconds=elapsed,
443
        )
444

445
    def _on_reduction_error(self, error_msg: str):
1✔
446
        """Handle dimensionality reduction error."""
447
        self.loading_dialog.hide_loading()
1✔
448
        log_error("Visualization failed: %s", error_msg)
1✔
449
        QMessageBox.warning(self, "Error", f"Visualization failed: {error_msg}")
1✔
450
        self.dr_panel.generate_button.setEnabled(True)
1✔
451
        self.status_label.setText("Visualization failed")
1✔
452

453
    def _on_feature_missing(self, feature_id: str) -> None:
1✔
454
        """Open the install dialog when a reduction dep is absent at runtime."""
455
        self.loading_dialog.hide_loading()
1✔
456
        self.dr_panel.generate_button.setEnabled(True)
1✔
457
        self.status_label.setText("")
1✔
458

459
        from vector_inspector.core.provider_detection import get_feature_info
1✔
460
        from vector_inspector.ui.dialogs.provider_install_dialog import ProviderInstallDialog
1✔
461

462
        feature = get_feature_info(feature_id)
1✔
463
        if feature:
1✔
464
            dlg = ProviderInstallDialog(feature, parent=self)
1✔
465
            provider_was_installed = False
1✔
466

467
            def _mark_provider_installed(_: str) -> None:
1✔
468
                nonlocal provider_was_installed
NEW
469
                provider_was_installed = True
×
470

471
            dlg.provider_installed.connect(_mark_provider_installed)
1✔
472
            dlg.exec()
1✔
473

474
            if provider_was_installed:
1✔
NEW
475
                self._generate_visualization()
×
476

477
    def _save_temp_html(self):
1✔
478
        """Save current plot HTML to temp file for browser viewing."""
479
        html = self.plot_panel.get_current_html()
1✔
480
        if html:
1✔
481
            with tempfile.NamedTemporaryFile(delete=False, suffix=".html", mode="w", encoding="utf-8") as temp_file:
1✔
482
                temp_file.write(html)
1✔
483
                temp_file.flush()
1✔
484
                self.temp_html_files.append(temp_file.name)
1✔
485
                self._last_temp_html = temp_file.name
1✔
486

487
    def _open_in_browser(self):
1✔
488
        """Open the last generated plot in a web browser."""
489
        if self._last_temp_html:
1✔
490
            webbrowser.open(f"file://{self._last_temp_html}")
1✔
491

492
    def _run_clustering(self):
1✔
493
        """Run clustering on current data."""
494
        if not self.current_collection:
1✔
495
            QMessageBox.warning(self, "No Collection", "Please select a collection first.")
1✔
496
            return
1✔
497

498
        # Load data if not already loaded
499
        if self.current_data is None:
1✔
500
            if self.use_all_checkbox.isChecked():
1✔
501
                sample_size = None
×
502
            else:
503
                sample_size = self.sample_spin.value()
1✔
504

505
            # Cancel any existing data load thread
506
            if self.data_load_thread and self.data_load_thread.isRunning():
1✔
507
                self.data_load_thread.quit()
×
508
                self.data_load_thread.wait()
×
509

510
            # Create and start data load thread for clustering
511
            self.data_load_thread = VisualizationDataLoadThread(
1✔
512
                self.connection,
513
                self.current_collection,
514
                sample_size,
515
                parent=self,
516
            )
517
            self.data_load_thread.finished.connect(self._on_clustering_data_loaded)
1✔
518
            self.data_load_thread.error.connect(self._on_data_load_error)
1✔
519

520
            # Show loading dialog during data load
521
            self.loading_dialog.show_loading("Loading data for clustering...")
1✔
522
            self.data_load_thread.start()
1✔
523
        else:
524
            # Data already loaded, proceed with clustering
525
            self._start_clustering()
1✔
526

527
    def _on_clustering_data_loaded(self, data: dict) -> None:
1✔
528
        """Handle successful data load for clustering."""
529
        self.loading_dialog.hide_loading()
1✔
530

531
        if (
1✔
532
            data is None
533
            or not data
534
            or "embeddings" not in data
535
            or data["embeddings"] is None
536
            or len(data["embeddings"]) == 0
537
        ):
538
            QMessageBox.warning(
1✔
539
                self,
540
                "No Data",
541
                "No embeddings found in collection.",
542
            )
543
            return
1✔
544

545
        self.current_data = data
1✔
546
        self.histogram_panel.set_data(
1✔
547
            data,
548
            collection_name=self.current_collection,
549
            sample_size=getattr(self, "_last_sample_size", None),
550
        )
551
        self._start_clustering()
1✔
552

553
    def _start_clustering(self) -> None:
1✔
554
        """Start clustering with already loaded data."""
555
        # Get algorithm and parameters from panel
556
        algorithm = self.clustering_panel.cluster_algorithm_combo.currentText()
1✔
557
        params = self.clustering_panel.get_clustering_params()
1✔
558

559
        # Run clustering in background thread
560
        self.loading_dialog.show_loading("Running clustering...")
1✔
561
        self.clustering_panel.cluster_button.setEnabled(False)
1✔
562

563
        self.clustering_thread = ClusteringThread(self.current_data["embeddings"], algorithm, params)
1✔
564
        self.clustering_thread.finished.connect(self._on_clustering_finished)
1✔
565
        self.clustering_thread.error.connect(self._on_clustering_error)
1✔
566
        self._cluster_start_time = time.time()
1✔
567
        self.clustering_thread.start()
1✔
568

569
    def _on_clustering_finished(self, result):
1✔
570
        """Handle clustering completion."""
571
        self.loading_dialog.hide_loading()
1✔
572
        labels, algo = result
1✔
573
        self.cluster_labels = labels
1✔
574

575
        # Count clusters
576
        unique_labels = set(self.cluster_labels)
1✔
577
        # Update clustering result label in panel
578
        if algo in ["HDBSCAN", "DBSCAN", "OPTICS"]:
1✔
579
            n_clusters = len([label for label in unique_labels if label != -1])
1✔
580
            n_noise = list(self.cluster_labels).count(-1)
1✔
581
            msg = f"Found {n_clusters} clusters, {n_noise} noise points"
1✔
582
        else:
583
            n_clusters = len(unique_labels)
1✔
584
            msg = f"Found {n_clusters} clusters"
1✔
585

586
        self.clustering_panel.cluster_result_label.setText(msg)
1✔
587
        self.clustering_panel.cluster_result_label.setVisible(True)
1✔
588
        self.status_label.setText(msg)
1✔
589
        self.status_label.setStyleSheet("color: green;")
1✔
590
        self.clustering_panel.cluster_button.setEnabled(True)
1✔
591

592
        # Report to status bar with timing
593
        elapsed = time.time() - self._cluster_start_time
1✔
594
        self.app_state.status_reporter.report_action(
1✔
595
            "Clustering",
596
            result_count=n_clusters,
597
            result_label="cluster",
598
            elapsed_seconds=elapsed,
599
        )
600

601
        # Save cluster labels to metadata if checkbox is checked
602
        if self.clustering_panel.save_to_metadata_checkbox.isChecked():
1✔
603
            self._save_cluster_labels_to_metadata()
1✔
604

605
        # Recreate plot with cluster colors if we have reduced data
606
        if self.reduced_data is not None:
1✔
607
            self.plot_panel.create_plot(
1✔
608
                reduced_data=self.reduced_data,
609
                current_data=self.current_data,
610
                cluster_labels=self.cluster_labels,
611
                method_name=self.dr_panel.method_combo.currentText(),
612
            )
613
            self._save_temp_html()
1✔
614

615
    def _save_cluster_labels_to_metadata(self):
1✔
616
        """Save cluster labels to item metadata in the database."""
617
        if not self.current_data or not self.cluster_labels.any():
1✔
618
            return
1✔
619

620
        if not self.connection:
1✔
621
            log_error("Cannot save cluster labels: no database connection")
1✔
622
            return
1✔
623

624
        if not self.current_collection:
1✔
625
            log_error("Cannot save cluster labels: no collection selected")
1✔
626
            return
1✔
627

628
        try:
1✔
629
            from datetime import datetime
1✔
630

631
            ids = self.current_data.get("ids", [])
1✔
632
            metadatas = self.current_data.get("metadatas", [])
1✔
633

634
            # Update metadata with cluster labels
635
            updated_metadatas = []
1✔
636
            for i, (item_id, metadata) in enumerate(zip(ids, metadatas)):
1✔
637
                if i >= len(self.cluster_labels):
1✔
638
                    break
×
639

640
                # Create a copy of metadata to avoid modifying original
641
                updated_meta = dict(metadata) if metadata else {}
1✔
642
                updated_meta["cluster"] = int(self.cluster_labels[i])
1✔
643
                updated_meta["updated_at"] = datetime.now(UTC).isoformat()
1✔
644
                updated_metadatas.append(updated_meta)
1✔
645

646
            # Batch update all items with new cluster metadata
647
            success = self.connection.update_items(
1✔
648
                self.current_collection,
649
                ids=ids[: len(updated_metadatas)],
650
                metadatas=updated_metadatas,
651
            )
652

653
            if success:
1✔
654
                log_info("Successfully saved %d cluster labels to metadata", len(updated_metadatas))
1✔
655
                # Update local cache
656
                self.current_data["metadatas"] = updated_metadatas
1✔
657
            else:
658
                log_error("Failed to save cluster labels to metadata")
1✔
659
                QMessageBox.warning(
1✔
660
                    self,
661
                    "Warning",
662
                    "Clustering complete, but failed to save cluster labels to metadata.",
663
                )
664
        except Exception as e:
1✔
665
            log_error("Error saving cluster labels to metadata: %s", e)
1✔
666
            QMessageBox.warning(self, "Warning", f"Clustering complete, but error saving labels to metadata: {e!s}")
1✔
667

668
    def _on_clustering_error(self, error_msg: str):
1✔
669
        """Handle clustering error."""
670
        self.loading_dialog.hide_loading()
1✔
671
        log_error("Clustering failed: %s", error_msg)
1✔
672
        QMessageBox.warning(self, "Error", f"Clustering failed: {error_msg}")
1✔
673
        self.clustering_panel.cluster_button.setEnabled(True)
1✔
674
        self.status_label.setText("Clustering failed")
1✔
675

676
    def set_collection_ready(self, ready: bool) -> None:
1✔
677
        """Enable or disable action buttons based on whether a collection is selected."""
678
        tooltip = "" if ready else "Select a collection to begin"
1✔
679
        self.dr_panel.generate_button.setEnabled(ready)
1✔
680
        self.dr_panel.generate_button.setToolTip(tooltip)
1✔
681
        self.clustering_panel.cluster_button.setEnabled(ready)
1✔
682
        self.clustering_panel.cluster_button.setToolTip(tooltip)
1✔
683
        if not ready:
1✔
684
            self.status_label.setText("Select a collection to begin")
1✔
685
            self.status_label.setStyleSheet("color: gray;")
1✔
686

687
    def set_collection(self, collection_name: str):
1✔
688
        """Set the current collection to visualize."""
689
        self.current_collection = collection_name
1✔
690
        self.set_collection_ready(True)
1✔
691
        self.current_data = None
1✔
692
        self.reduced_data = None
1✔
693
        self.cluster_labels = None
1✔
694
        # Clear clustering results when switching collection/provider
695
        try:
1✔
696
            if hasattr(self, "clustering_panel") and hasattr(self.clustering_panel, "cluster_result_label"):
1✔
697
                self.clustering_panel.cluster_result_label.setVisible(False)
1✔
698
                self.clustering_panel.cluster_result_label.setText("")
1✔
699
        except Exception:
1✔
700
            pass
1✔
701

702
        self.status_label.setText(f"Collection: {collection_name}")
1✔
703

704
    def _on_view_in_data_browser(self, _point_index: int, point_id: str):
1✔
705
        """Handle button click to view selected point in data browser.
706

707
        Args:
708
            _point_index: Index of the selected point (unused)
709
            point_id: ID of the selected point
710
        """
711
        if point_id:
1✔
712
            self.view_in_data_browser_requested.emit(point_id)
1✔
713

714
    def cleanup_temp_html(self):
1✔
715
        """Clean up temporary HTML files."""
716
        import contextlib
1✔
717
        import os
1✔
718

719
        from PySide6.QtWidgets import QApplication
1✔
720

721
        # Dispose webengine objects in child panels first so pages/views
722
        # are deleted before the WebEngineProfile is released by Qt.
723
        try:
1✔
724
            try:
1✔
725
                if hasattr(self, "plot_panel") and getattr(self.plot_panel, "dispose", None):
1✔
726
                    self.plot_panel.dispose()
1✔
727
            except Exception:
×
728
                pass
×
729
            try:
1✔
730
                if hasattr(self, "histogram_panel") and getattr(self.histogram_panel, "dispose", None):
1✔
731
                    self.histogram_panel.dispose()
1✔
732
            except Exception:
×
733
                pass
×
734

735
            # Let Qt process deletion events to avoid race conditions
736
            try:
1✔
737
                QApplication.processEvents()
1✔
738
            except Exception:
×
739
                pass
×
740
        except Exception:
×
741
            pass
×
742

743
        for f in getattr(self, "temp_html_files", []):
1✔
744
            with contextlib.suppress(Exception):
1✔
745
                os.remove(f)
1✔
746
        self.temp_html_files = []
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc