• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

DHARPA-Project / kiara_plugin.network_analysis / 16113187676

07 Jul 2025 09:28AM UTC coverage: 54.789% (+0.1%) from 54.641%
16113187676

push

github

makkus
build: add marimo depenendcy

84 of 163 branches covered (51.53%)

Branch coverage included in aggregate %.

591 of 1069 relevant lines covered (55.29%)

2.76 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

63.13
/src/kiara_plugin/network_analysis/models/__init__.py
1
# -*- coding: utf-8 -*-
2

3
"""This module contains the metadata (and other) models that are used in the ``kiara_plugin.network_analysis`` package.
4

5
Those models are convenience wrappers that make it easier for *kiara* to find, create, manage and version metadata -- but also
6
other type of models -- that is attached to data, as well as *kiara* modules.
7

8
Metadata models must be a sub-class of [kiara.metadata.MetadataModel][kiara.metadata.MetadataModel]. Other models usually
9
sub-class a pydantic BaseModel or implement custom base classes.
10
"""
11

12
from typing import (
5✔
13
    TYPE_CHECKING,
14
    ClassVar,
15
    Dict,
16
    Iterable,
17
    List,
18
    Literal,
19
    Protocol,
20
    Set,
21
    Type,
22
    TypeVar,
23
    Union,
24
)
25

26
from pydantic import BaseModel, Field
5✔
27

28
from kiara.exceptions import KiaraException
5✔
29
from kiara.models import KiaraModel
5✔
30
from kiara.models.values.value import Value
5✔
31
from kiara.models.values.value_metadata import ValueMetadata
5✔
32
from kiara_plugin.network_analysis.defaults import (
5✔
33
    ATTRIBUTE_PROPERTY_KEY,
34
    COMPONENT_ID_COLUMN_NAME,
35
    CONNECTIONS_COLUMN_NAME,
36
    CONNECTIONS_MULTI_COLUMN_NAME,
37
    COUNT_DIRECTED_COLUMN_NAME,
38
    COUNT_IDX_DIRECTED_COLUMN_NAME,
39
    COUNT_IDX_UNDIRECTED_COLUMN_NAME,
40
    COUNT_UNDIRECTED_COLUMN_NAME,
41
    EDGE_ID_COLUMN_NAME,
42
    EDGES_TABLE_NAME,
43
    IN_DIRECTED_COLUMN_NAME,
44
    IN_DIRECTED_MULTI_COLUMN_NAME,
45
    LABEL_COLUMN_NAME,
46
    NODE_ID_COLUMN_NAME,
47
    NODES_TABLE_NAME,
48
    OUT_DIRECTED_COLUMN_NAME,
49
    OUT_DIRECTED_MULTI_COLUMN_NAME,
50
    SOURCE_COLUMN_NAME,
51
    TARGET_COLUMN_NAME,
52
    UNWEIGHTED_DEGREE_CENTRALITY_COLUMN_NAME,
53
    UNWEIGHTED_DEGREE_CENTRALITY_MULTI_COLUMN_NAME,
54
    GraphType,
55
)
56
from kiara_plugin.network_analysis.utils import (
5✔
57
    augment_edges_table_with_id_and_weights,
58
    augment_nodes_table_with_connection_counts,
59
    augment_tables_with_component_ids,
60
    extract_networkx_edges_as_table,
61
    extract_networkx_nodes_as_table,
62
)
63
from kiara_plugin.tabular.models.tables import KiaraTables
5✔
64

65
if TYPE_CHECKING:
5✔
66
    import networkx as nx
×
67
    import pyarrow as pa
×
68
    import rustworkx as rx
×
69

70
    from kiara_plugin.network_analysis.models.metadata import (
×
71
        NetworkNodeAttributeMetadata,
72
    )
73
    from kiara_plugin.tabular.models.table import KiaraTable
×
74

75
NETWORKX_GRAPH_TYPE = TypeVar("NETWORKX_GRAPH_TYPE", bound="nx.Graph")
5✔
76
RUSTWORKX_GRAPH_TYPE = TypeVar("RUSTWORKX_GRAPH_TYPE", "rx.PyGraph", "rx.PyDiGraph")
5✔
77

78

79
class NodesCallback(Protocol):
5✔
80
    def __call__(self, _node_id: int, **kwargs) -> None: ...
5✔
81

82

83
class EdgesCallback(Protocol):
5✔
84
    def __call__(self, _source: int, _target: int, **kwargs) -> None: ...
5✔
85

86

87
class NetworkData(KiaraTables):
5✔
88
    """A flexible, graph-type agnostic wrapper class for network datasets.
89

90
    This class provides a unified interface for working with network data that can represent
91
    any type of graph structure: directed, undirected, simple, or multi-graphs. The design
92
    philosophy emphasizes flexibility and performance while maintaining a clean, intuitive API.
93

94
    **Design Philosophy:**
95
    - **Graph Type Agnostic**: Supports all graph types (directed/undirected, simple/multi)
96
      within the same data structure without requiring type-specific conversions
97
    - **Efficient Storage**: Uses Apache Arrow tables for high-performance columnar storage
98
    - **Flexible Querying**: Provides SQL-based querying capabilities alongside programmatic access
99
    - **Seamless Export**: Easy conversion to NetworkX and RustWorkX graph objects, other representations possible in the future
100
    - **Metadata Rich**: Automatically computes and stores graph statistics and properties
101

102
    **Internal Structure:**
103
    The network data is stored as two Arrow tables:
104
    - **nodes table**: Contains node information with required columns '_node_id' (int) and '_label' (str)
105
    - **edges table**: Contains edge information with required columns '_source' (int) and '_target' (int)
106

107
    Additional computed columns (prefixed with '_') provide graph statistics for different interpretations:
108
    - Degree counts for directed/undirected graphs
109
    - Multi-edge counts and indices
110
    - Centrality measures
111

112
    **Graph Type Support:**
113
    - **Simple Graphs**: Single edges between node pairs
114
    - **Multi-graphs**: Multiple edges between the same node pairs
115
    - **Directed Graphs**: One-way edges with source → target semantics
116
    - **Undirected Graphs**: Bidirectional edges
117
    - **Mixed Types**: The same data can be interpreted as different graph types
118

119
    **Note:** Column names prefixed with '_' have internal meaning and are automatically
120
    computed. Original attributes from source data are stored without the prefix.
121
    """
122

123
    _kiara_model_id: ClassVar = "instance.network_data"
5✔
124

125
    @classmethod
5✔
126
    def create_augmented(
5✔
127
        cls,
128
        network_data: "NetworkData",
129
        additional_edges_columns: Union[None, Dict[str, "pa.Array"]] = None,
130
        additional_nodes_columns: Union[None, Dict[str, "pa.Array"]] = None,
131
        nodes_column_metadata: Union[Dict[str, Dict[str, KiaraModel]], None] = None,
132
        edges_column_metadata: Union[Dict[str, Dict[str, KiaraModel]], None] = None,
133
    ) -> "NetworkData":
134
        """Create a new NetworkData instance with additional columns.
135

136
        This method creates a new NetworkData instance by adding extra columns to an existing
137
        instance without recomputing the automatically generated internal columns (those
138
        prefixed with '_'). This is useful for adding derived attributes or analysis results.
139

140
        Args:
141
            network_data: The source NetworkData instance to augment
142
            additional_edges_columns: Dictionary mapping column names to PyArrow Arrays
143
                for new edge attributes
144
            additional_nodes_columns: Dictionary mapping column names to PyArrow Arrays
145
                for new node attributes
146
            nodes_column_metadata: Additional metadata to attach to nodes table columns
147
            edges_column_metadata: Additional metadata to attach to edges table columns
148

149
        Returns:
150
            NetworkData: A new NetworkData instance with the additional columns
151

152
        Example:
153
            ```python
154
            import pyarrow as pa
155

156
            # Add a weight column to edges
157
            weights = pa.array([1.0, 2.5, 0.8] * (network_data.num_edges // 3))
158
            augmented = NetworkData.create_augmented(
159
                network_data,
160
                additional_edges_columns={"weight": weights}
161
            )
162
            ```
163
        """
164

165
        nodes_table = network_data.nodes.arrow_table
×
166
        edges_table = network_data.edges.arrow_table
×
167

168
        # nodes_table = pa.Table.from_arrays(orig_nodes_table.columns, schema=orig_nodes_table.schema)
169
        # edges_table = pa.Table.from_arrays(orig_edges_table.columns, schema=orig_edges_table.schema)
170

171
        if additional_edges_columns is not None:
×
172
            for col_name, col_data in additional_edges_columns.items():
×
173
                edges_table = edges_table.append_column(col_name, col_data)
×
174

175
        if additional_nodes_columns is not None:
×
176
            for col_name, col_data in additional_nodes_columns.items():
×
177
                nodes_table = nodes_table.append_column(col_name, col_data)
×
178

179
        new_network_data = NetworkData.create_network_data(
×
180
            nodes_table=nodes_table,
181
            edges_table=edges_table,
182
            augment_tables=False,
183
            nodes_column_metadata=nodes_column_metadata,
184
            edges_column_metadata=edges_column_metadata,
185
        )
186

187
        return new_network_data
×
188

189
    @classmethod
5✔
190
    def create_network_data(
5✔
191
        cls,
192
        nodes_table: "pa.Table",
193
        edges_table: "pa.Table",
194
        augment_tables: bool = True,
195
        nodes_column_metadata: Union[Dict[str, Dict[str, KiaraModel]], None] = None,
196
        edges_column_metadata: Union[Dict[str, Dict[str, KiaraModel]], None] = None,
197
    ) -> "NetworkData":
198
        """Create a NetworkData instance from PyArrow tables.
199

200
        This is the primary factory method for creating NetworkData instances from raw tabular data.
201
        It supports all graph types and automatically computes necessary metadata for efficient
202
        graph operations.
203

204
        **Required Table Structure:**
205

206
        Nodes table must contain:
207
        - '_node_id' (int): Unique integer identifier for each node
208
        - '_label' (str): Human-readable label for the node
209

210
        Edges table must contain:
211
        - '_source' (int): Source node ID (must exist in nodes table)
212
        - '_target' (int): Target node ID (must exist in nodes table)
213

214
        **Automatic Augmentation:**
215
        When `augment_tables=True` (default), the method automatically adds computed columns:
216

217
        For edges:
218
        - '_edge_id': Unique edge identifier
219
        - '_count_dup_directed': Count of parallel edges (directed interpretation)
220
        - '_idx_dup_directed': Index within parallel edge group (directed)
221
        - '_count_dup_undirected': Count of parallel edges (undirected interpretation)
222
        - '_idx_dup_undirected': Index within parallel edge group (undirected)
223

224
        For nodes:
225
        - '_count_edges': Total edge count (simple graph interpretation)
226
        - '_count_edges_multi': Total edge count (multi-graph interpretation)
227
        - '_in_edges': Incoming edge count (directed, simple)
228
        - '_out_edges': Outgoing edge count (directed, simple)
229
        - '_in_edges_multi': Incoming edge count (directed, multi)
230
        - '_out_edges_multi': Outgoing edge count (directed, multi)
231
        - '_degree_centrality': Normalized degree centrality
232
        - '_degree_centrality_multi': Normalized degree centrality (multi-graph)
233

234
        Args:
235
            nodes_table: PyArrow table containing node data
236
            edges_table: PyArrow table containing edge data
237
            augment_tables: Whether to compute and add internal metadata columns.
238
                Set to False only if you know the metadata is already present and correct.
239
            nodes_column_metadata: Additional metadata to attach to nodes table columns.
240
                Format: {column_name: {property_name: property_value}}
241
            edges_column_metadata: Additional metadata to attach to edges table columns.
242
                Format: {column_name: {property_name: property_value}}
243

244
        Returns:
245
            NetworkData: A new NetworkData instance
246

247
        Raises:
248
            KiaraException: If required columns are missing or contain null values
249

250
        """
251

252
        from kiara_plugin.network_analysis.models.metadata import (
5✔
253
            EDGE_COUNT_DUP_DIRECTED_COLUMN_METADATA,
254
            EDGE_COUNT_DUP_UNDIRECTED_COLUMN_METADATA,
255
            EDGE_ID_COLUMN_METADATA,
256
            EDGE_IDX_DUP_DIRECTED_COLUMN_METADATA,
257
            EDGE_IDX_DUP_UNDIRECTED_COLUMN_METADATA,
258
            EDGE_SOURCE_COLUMN_METADATA,
259
            EDGE_TARGET_COLUMN_METADATA,
260
            NODE_COUND_EDGES_MULTI_COLUMN_METADATA,
261
            NODE_COUNT_EDGES_COLUMN_METADATA,
262
            NODE_COUNT_IN_EDGES_COLUMN_METADATA,
263
            NODE_COUNT_IN_EDGES_MULTI_COLUMN_METADATA,
264
            NODE_COUNT_OUT_EDGES_COLUMN_METADATA,
265
            NODE_COUNT_OUT_EDGES_MULTI_COLUMN_METADATA,
266
            NODE_DEGREE_COLUMN_METADATA,
267
            NODE_DEGREE_MULTI_COLUMN_METADATA,
268
            NODE_ID_COLUMN_METADATA,
269
            NODE_LABEL_COLUMN_METADATA,
270
        )
271

272
        if augment_tables:
5✔
273
            edges_table = augment_edges_table_with_id_and_weights(edges_table)
5✔
274
            nodes_table = augment_nodes_table_with_connection_counts(
5✔
275
                nodes_table, edges_table
276
            )
277
            nodes_table, edges_table = augment_tables_with_component_ids(
5✔
278
                nodes_table=nodes_table, edges_table=edges_table
279
            )
280

281
        if edges_table.column(SOURCE_COLUMN_NAME).null_count > 0:
5✔
282
            raise KiaraException(
×
283
                msg="Can't assemble network data.",
284
                details="Source column in edges table contains null values.",
285
            )
286
        if edges_table.column(TARGET_COLUMN_NAME).null_count > 0:
5✔
287
            raise KiaraException(
×
288
                msg="Can't assemble network data.",
289
                details="Target column in edges table contains null values.",
290
            )
291

292
        network_data: NetworkData = cls.create_tables(
5✔
293
            {NODES_TABLE_NAME: nodes_table, EDGES_TABLE_NAME: edges_table}
294
        )
295

296
        # set default column metadata
297
        network_data.edges.set_column_metadata(
5✔
298
            EDGE_ID_COLUMN_NAME,
299
            ATTRIBUTE_PROPERTY_KEY,
300
            EDGE_ID_COLUMN_METADATA,
301
            overwrite_existing=False,
302
        )
303
        network_data.edges.set_column_metadata(
5✔
304
            SOURCE_COLUMN_NAME,
305
            ATTRIBUTE_PROPERTY_KEY,
306
            EDGE_SOURCE_COLUMN_METADATA,
307
            overwrite_existing=False,
308
        )
309
        network_data.edges.set_column_metadata(
5✔
310
            TARGET_COLUMN_NAME,
311
            ATTRIBUTE_PROPERTY_KEY,
312
            EDGE_TARGET_COLUMN_METADATA,
313
            overwrite_existing=False,
314
        )
315
        network_data.edges.set_column_metadata(
5✔
316
            COUNT_DIRECTED_COLUMN_NAME,
317
            ATTRIBUTE_PROPERTY_KEY,
318
            EDGE_COUNT_DUP_DIRECTED_COLUMN_METADATA,
319
            overwrite_existing=False,
320
        )
321
        network_data.edges.set_column_metadata(
5✔
322
            COUNT_IDX_DIRECTED_COLUMN_NAME,
323
            ATTRIBUTE_PROPERTY_KEY,
324
            EDGE_IDX_DUP_DIRECTED_COLUMN_METADATA,
325
            overwrite_existing=False,
326
        )
327
        network_data.edges.set_column_metadata(
5✔
328
            COUNT_UNDIRECTED_COLUMN_NAME,
329
            ATTRIBUTE_PROPERTY_KEY,
330
            EDGE_COUNT_DUP_UNDIRECTED_COLUMN_METADATA,
331
            overwrite_existing=False,
332
        )
333
        network_data.edges.set_column_metadata(
5✔
334
            COUNT_IDX_UNDIRECTED_COLUMN_NAME,
335
            ATTRIBUTE_PROPERTY_KEY,
336
            EDGE_IDX_DUP_UNDIRECTED_COLUMN_METADATA,
337
            overwrite_existing=False,
338
        )
339

340
        network_data.nodes.set_column_metadata(
5✔
341
            NODE_ID_COLUMN_NAME,
342
            ATTRIBUTE_PROPERTY_KEY,
343
            NODE_ID_COLUMN_METADATA,
344
            overwrite_existing=False,
345
        )
346
        network_data.nodes.set_column_metadata(
5✔
347
            LABEL_COLUMN_NAME,
348
            ATTRIBUTE_PROPERTY_KEY,
349
            NODE_LABEL_COLUMN_METADATA,
350
            overwrite_existing=False,
351
        )
352
        network_data.nodes.set_column_metadata(
5✔
353
            CONNECTIONS_COLUMN_NAME,
354
            ATTRIBUTE_PROPERTY_KEY,
355
            NODE_COUNT_EDGES_COLUMN_METADATA,
356
            overwrite_existing=False,
357
        )
358
        network_data.nodes.set_column_metadata(
5✔
359
            UNWEIGHTED_DEGREE_CENTRALITY_COLUMN_NAME,
360
            ATTRIBUTE_PROPERTY_KEY,
361
            NODE_DEGREE_COLUMN_METADATA,
362
            overwrite_existing=False,
363
        )
364
        network_data.nodes.set_column_metadata(
5✔
365
            CONNECTIONS_MULTI_COLUMN_NAME,
366
            ATTRIBUTE_PROPERTY_KEY,
367
            NODE_COUND_EDGES_MULTI_COLUMN_METADATA,
368
            overwrite_existing=False,
369
        )
370
        network_data.nodes.set_column_metadata(
5✔
371
            UNWEIGHTED_DEGREE_CENTRALITY_MULTI_COLUMN_NAME,
372
            ATTRIBUTE_PROPERTY_KEY,
373
            NODE_DEGREE_MULTI_COLUMN_METADATA,
374
            overwrite_existing=False,
375
        )
376
        network_data.nodes.set_column_metadata(
5✔
377
            IN_DIRECTED_COLUMN_NAME,
378
            ATTRIBUTE_PROPERTY_KEY,
379
            NODE_COUNT_IN_EDGES_COLUMN_METADATA,
380
            overwrite_existing=False,
381
        )
382
        network_data.nodes.set_column_metadata(
5✔
383
            IN_DIRECTED_MULTI_COLUMN_NAME,
384
            ATTRIBUTE_PROPERTY_KEY,
385
            NODE_COUNT_IN_EDGES_MULTI_COLUMN_METADATA,
386
            overwrite_existing=False,
387
        )
388
        network_data.nodes.set_column_metadata(
5✔
389
            OUT_DIRECTED_COLUMN_NAME,
390
            ATTRIBUTE_PROPERTY_KEY,
391
            NODE_COUNT_OUT_EDGES_COLUMN_METADATA,
392
            overwrite_existing=False,
393
        )
394
        network_data.nodes.set_column_metadata(
5✔
395
            OUT_DIRECTED_MULTI_COLUMN_NAME,
396
            ATTRIBUTE_PROPERTY_KEY,
397
            NODE_COUNT_OUT_EDGES_MULTI_COLUMN_METADATA,
398
            overwrite_existing=False,
399
        )
400

401
        if nodes_column_metadata is not None:
5✔
402
            for col_name, col_meta in nodes_column_metadata.items():
×
403
                for prop_name, prop_value in col_meta.items():
×
404
                    network_data.nodes.set_column_metadata(
×
405
                        col_name, prop_name, prop_value, overwrite_existing=True
406
                    )
407
        if edges_column_metadata is not None:
5✔
408
            for col_name, col_meta in edges_column_metadata.items():
×
409
                for prop_name, prop_value in col_meta.items():
×
410
                    network_data.edges.set_column_metadata(
×
411
                        col_name, prop_name, prop_value, overwrite_existing=True
412
                    )
413

414
        return network_data
5✔
415

416
    @classmethod
5✔
417
    def from_filtered_nodes(
5✔
418
        cls, network_data: "NetworkData", nodes_list: List[int]
419
    ) -> "NetworkData":
420
        """Create a new, filtered instance of this class using a source network, and a list of node ids to include.
421

422
        Nodes/edges containing a node id not in the list will be removed from the resulting network data.
423

424
        Arguments:
425
            network_data: the source network data
426
            nodes_list: the list of node ids to include in the filtered network data
427
        """
428

429
        import duckdb
×
430
        import polars as pl
×
431

432
        node_columns = [NODE_ID_COLUMN_NAME, LABEL_COLUMN_NAME]
×
433
        for column_name, metadata in network_data.nodes.column_metadata.items():
×
434
            attr_prop: Union[None, NetworkNodeAttributeMetadata] = metadata.get(  # type: ignore
×
435
                ATTRIBUTE_PROPERTY_KEY, None
436
            )
437
            if attr_prop is None or not attr_prop.computed_attribute:
×
438
                node_columns.append(column_name)
×
439

440
        node_list_str = ", ".join([str(n) for n in nodes_list])
×
441

442
        nodes_table = network_data.nodes.arrow_table  # noqa
×
443
        nodes_query = f"SELECT {', '.join(node_columns)} FROM nodes_table n WHERE n.{NODE_ID_COLUMN_NAME} IN ({node_list_str})"
×
444

445
        nodes_result = duckdb.sql(nodes_query).pl()
×
446

447
        edges_table = network_data.edges.arrow_table  # noqa
×
448
        edge_columns = [SOURCE_COLUMN_NAME, TARGET_COLUMN_NAME]
×
449
        for column_name, metadata in network_data.edges.column_metadata.items():
×
450
            attr_prop = metadata.get(ATTRIBUTE_PROPERTY_KEY, None)  # type: ignore
×
451
            if attr_prop is None or not attr_prop.computed_attribute:
×
452
                edge_columns.append(column_name)
×
453

454
        edges_query = f"SELECT {', '.join(edge_columns)} FROM edges_table WHERE {SOURCE_COLUMN_NAME} IN ({node_list_str}) OR {TARGET_COLUMN_NAME} IN ({node_list_str})"
×
455

456
        edges_result = duckdb.sql(edges_query).pl()
×
457

458
        nodes_idx_colum = range(len(nodes_result))
×
459
        old_idx_column = nodes_result[NODE_ID_COLUMN_NAME]
×
460

461
        repl_map = dict(zip(old_idx_column.to_list(), nodes_idx_colum))
×
462
        nodes_result = nodes_result.with_columns(
×
463
            pl.col(NODE_ID_COLUMN_NAME).replace_strict(repl_map, default=None)
464
        )
465

466
        edges_result = edges_result.with_columns(
×
467
            pl.col(SOURCE_COLUMN_NAME).replace_strict(repl_map, default=None),
468
            pl.col(TARGET_COLUMN_NAME).replace_strict(repl_map, default=None),
469
        )
470

471
        filtered = NetworkData.create_network_data(
×
472
            nodes_table=nodes_result, edges_table=edges_result
473
        )
474
        return filtered
×
475

476
    @classmethod
5✔
477
    def create_from_networkx_graph(
5✔
478
        cls,
479
        graph: "nx.Graph",
480
        label_attr_name: Union[str, None] = None,
481
        ignore_node_attributes: Union[Iterable[str], None] = None,
482
    ) -> "NetworkData":
483
        """Create a NetworkData instance from any NetworkX graph type.
484

485
        This method provides seamless conversion from NetworkX graphs to NetworkData,
486
        preserving all node and edge attributes while automatically handling different
487
        graph types (Graph, DiGraph, MultiGraph, MultiDiGraph).
488

489
        **Graph Type Support:**
490
        - **nx.Graph**: Converted to undirected simple graph representation
491
        - **nx.DiGraph**: Converted to directed simple graph representation
492
        - **nx.MultiGraph**: Converted with multi-edge support (undirected)
493
        - **nx.MultiDiGraph**: Converted with multi-edge support (directed)
494

495
        **Attribute Handling:**
496
        All NetworkX node and edge attributes are preserved as columns in the resulting
497
        tables, except those starting with '_' (reserved for internal use).
498

499
        Args:
500
            graph: Any NetworkX graph instance (Graph, DiGraph, MultiGraph, MultiDiGraph)
501
            label_attr_name: Name of the node attribute to use as the node label.
502
                If None, the node ID is converted to string and used as label.
503
                Can also be an iterable of attribute names to try in order.
504
            ignore_node_attributes: List of node attribute names to exclude from
505
                the resulting nodes table
506

507
        Returns:
508
            NetworkData: A new NetworkData instance representing the graph
509

510
        Raises:
511
            KiaraException: If node/edge attributes contain names starting with '_'
512

513
        Note:
514
            Node IDs in the original NetworkX graph are mapped to sequential integers
515
            starting from 0 in the NetworkData representation. The original node IDs
516
            are preserved as the '_label' if no label_attr_name is specified.
517
        """
518

519
        # TODO: should we also index nodes/edges attributes?
520

521
        nodes_table, node_id_map = extract_networkx_nodes_as_table(
×
522
            graph=graph,
523
            label_attr_name=label_attr_name,
524
            ignore_attributes=ignore_node_attributes,
525
        )
526

527
        edges_table = extract_networkx_edges_as_table(graph, node_id_map)
×
528

529
        network_data = NetworkData.create_network_data(
×
530
            nodes_table=nodes_table, edges_table=edges_table
531
        )
532

533
        return network_data
×
534

535
    @property
5✔
536
    def edges(self) -> "KiaraTable":
5✔
537
        """Access the edges table containing all edge data and computed statistics.
538

539
        The edges table contains both original edge attributes and computed columns:
540
        - '_edge_id': Unique edge identifier
541
        - '_source', '_target': Node IDs for edge endpoints
542
        - '_count_dup_*': Parallel edge counts for different graph interpretations
543
        - '_idx_dup_*': Indices within parallel edge groups
544
        - Original edge attributes (without '_' prefix)
545

546
        Returns:
547
            KiaraTable: The edges table with full schema and data access methods
548
        """
549
        return self.tables[EDGES_TABLE_NAME]
5✔
550

551
    @property
5✔
552
    def nodes(self) -> "KiaraTable":
5✔
553
        """Access the nodes table containing all node data and computed statistics.
554

555
        The nodes table contains both original node attributes and computed columns:
556
        - '_node_id': Unique node identifier (sequential integers from 0)
557
        - '_label': Human-readable node label
558
        - '_count_edges*': Edge counts for different graph interpretations
559
        - '_in_edges*', '_out_edges*': Directional edge counts
560
        - '_degree_centrality*': Normalized degree centrality measures
561
        - Original node attributes (without '_' prefix)
562

563
        Returns:
564
            KiaraTable: The nodes table with full schema and data access methods
565
        """
566
        return self.tables[NODES_TABLE_NAME]
5✔
567

568
    @property
5✔
569
    def num_nodes(self) -> int:
5✔
570
        """Get the total number of nodes in the network.
571

572
        Returns:
573
            int: Number of nodes in the network
574
        """
575
        return self.nodes.num_rows  # type: ignore
5✔
576

577
    @property
5✔
578
    def num_edges(self) -> int:
5✔
579
        """Get the total number of edges in the network.
580

581
        Note: This returns the total number of edge records, which includes
582
        all parallel edges in multi-graph interpretations.
583

584
        Returns:
585
            int: Total number of edges (including parallel edges)
586
        """
587
        return self.edges.num_rows  # type: ignore
5✔
588

589
    def query_edges(
5✔
590
        self, sql_query: str, relation_name: str = EDGES_TABLE_NAME
591
    ) -> "pa.Table":
592
        """Execute SQL queries on the edges table for flexible data analysis.
593

594
        This method provides direct SQL access to the edges table, enabling complex
595
        queries and aggregations. All computed edge columns are available for querying.
596

597
        **Available Columns:**
598
        - '_edge_id': Unique edge identifier
599
        - '_source', '_target': Node IDs for edge endpoints
600
        - '_count_dup_directed': Number of parallel edges (directed interpretation)
601
        - '_idx_dup_directed': Index within parallel edge group (directed)
602
        - '_count_dup_undirected': Number of parallel edges (undirected interpretation)
603
        - '_idx_dup_undirected': Index within parallel edge group (undirected)
604
        - Original edge attributes (names without '_' prefix)
605

606
        Args:
607
            sql_query: SQL query string. Use 'edges' as the table name in your query.
608
            relation_name: Alternative table name to use in the query (default: 'edges').
609
                If specified, all occurrences of this name in the query will be replaced
610
                with 'edges'.
611

612
        Returns:
613
            pa.Table: Query results as a PyArrow table
614

615
        Example:
616
            ```python
617
            # Find edges with high multiplicity
618
            parallel_edges = network_data.query_edges(
619
                "SELECT _source, _target, _count_dup_directed FROM edges WHERE _count_dup_directed > 1"
620
            )
621

622
            # Get edge statistics
623
            stats = network_data.query_edges(
624
                "SELECT COUNT(*) as total_edges, AVG(_count_dup_directed) as avg_multiplicity FROM edges"
625
            )
626
            ```
627
        """
628
        import duckdb
5✔
629

630
        con = duckdb.connect()
5✔
631
        edges = self.edges.arrow_table  # noqa: F841
5✔
632
        if relation_name != EDGES_TABLE_NAME:
5✔
633
            sql_query = sql_query.replace(relation_name, EDGES_TABLE_NAME)
×
634

635
        result = con.execute(sql_query)
5✔
636
        return result.arrow()
5✔
637

638
    def query_nodes(
5✔
639
        self, sql_query: str, relation_name: str = NODES_TABLE_NAME
640
    ) -> "pa.Table":
641
        """Execute SQL queries on the nodes table for flexible data analysis.
642

643
        This method provides direct SQL access to the nodes table, enabling complex
644
        queries and aggregations. All computed node statistics are available for querying.
645

646
        **Available Columns:**
647
        - '_node_id': Unique node identifier
648
        - '_label': Human-readable node label
649
        - '_count_edges': Total edge count (simple graph interpretation)
650
        - '_count_edges_multi': Total edge count (multi-graph interpretation)
651
        - '_in_edges': Incoming edge count (directed, simple)
652
        - '_out_edges': Outgoing edge count (directed, simple)
653
        - '_in_edges_multi': Incoming edge count (directed, multi)
654
        - '_out_edges_multi': Outgoing edge count (directed, multi)
655
        - '_degree_centrality': Normalized degree centrality (simple)
656
        - '_degree_centrality_multi': Normalized degree centrality (multi)
657
        - Original node attributes (names without '_' prefix)
658

659
        Args:
660
            sql_query: SQL query string. Use 'nodes' as the table name in your query.
661
            relation_name: Alternative table name to use in the query (default: 'nodes').
662
                If specified, all occurrences of this name in the query will be replaced
663
                with 'nodes'.
664

665
        Returns:
666
            pa.Table: Query results as a PyArrow table
667

668
        Example:
669
            ```python
670
            # Find high-degree nodes
671
            hubs = network_data.query_nodes(
672
                "SELECT _node_id, _label, _count_edges FROM nodes WHERE _count_edges > 10 ORDER BY _count_edges DESC"
673
            )
674

675
            # Get centrality statistics
676
            centrality_stats = network_data.query_nodes(
677
                "SELECT AVG(_degree_centrality) as avg_centrality, MAX(_degree_centrality) as max_centrality FROM nodes"
678
            )
679
            ```
680
        """
681
        import duckdb
×
682

683
        con = duckdb.connect()
×
684
        nodes = self.nodes.arrow_table  # noqa
×
685
        if relation_name != NODES_TABLE_NAME:
×
686
            sql_query = sql_query.replace(relation_name, NODES_TABLE_NAME)
×
687

688
        result = con.execute(sql_query)
×
689
        return result.arrow()
×
690

691
    def _calculate_node_attributes(
5✔
692
        self, incl_node_attributes: Union[bool, str, Iterable[str]]
693
    ) -> List[str]:
694
        """Calculate the node attributes that should be included in the output."""
695

696
        if incl_node_attributes is False:
5✔
697
            node_attr_names: List[str] = [NODE_ID_COLUMN_NAME, LABEL_COLUMN_NAME]
5✔
698
        else:
699
            all_node_attr_names: List[str] = self.nodes.column_names  # type: ignore
×
700
            if incl_node_attributes is True:
×
701
                node_attr_names = [NODE_ID_COLUMN_NAME]
×
702
                node_attr_names.extend(
×
703
                    (x for x in all_node_attr_names if x != NODE_ID_COLUMN_NAME)
704
                )  # type: ignore
705
            elif isinstance(incl_node_attributes, str):
×
706
                if incl_node_attributes not in all_node_attr_names:
×
707
                    raise KiaraException(
×
708
                        f"Can't include node attribute {incl_node_attributes}: not part of the available attributes ({', '.join(all_node_attr_names)})."
709
                    )
710
                node_attr_names = [NODE_ID_COLUMN_NAME, incl_node_attributes]
×
711
            else:
712
                node_attr_names = [NODE_ID_COLUMN_NAME]
×
713
                for attr_name in incl_node_attributes:
×
714
                    if attr_name not in all_node_attr_names:
×
715
                        raise KiaraException(
×
716
                            f"Can't include node attribute {incl_node_attributes}: not part of the available attributes ({', '.join(all_node_attr_names)})."
717
                        )
718
                    node_attr_names.append(attr_name)  # type: ignore
×
719

720
        return node_attr_names
5✔
721

722
    def _calculate_edge_attributes(
5✔
723
        self, incl_edge_attributes: Union[bool, str, Iterable[str]]
724
    ) -> List[str]:
725
        """Calculate the edge attributes that should be included in the output."""
726

727
        if incl_edge_attributes is False:
5✔
728
            edge_attr_names: List[str] = [SOURCE_COLUMN_NAME, TARGET_COLUMN_NAME]
5✔
729
        else:
730
            all_edge_attr_names: List[str] = self.edges.column_names  # type: ignore
×
731
            if incl_edge_attributes is True:
×
732
                edge_attr_names = [SOURCE_COLUMN_NAME, TARGET_COLUMN_NAME]
×
733
                edge_attr_names.extend(
×
734
                    (
735
                        x
736
                        for x in all_edge_attr_names
737
                        if x not in (SOURCE_COLUMN_NAME, TARGET_COLUMN_NAME)
738
                    )
739
                )  # type: ignore
740
            elif isinstance(incl_edge_attributes, str):
×
741
                if incl_edge_attributes not in all_edge_attr_names:
×
742
                    raise KiaraException(
×
743
                        f"Can't include edge attribute {incl_edge_attributes}: not part of the available attributes ({', '.join(all_edge_attr_names)})."
744
                    )
745
                edge_attr_names = [
×
746
                    SOURCE_COLUMN_NAME,
747
                    TARGET_COLUMN_NAME,
748
                    incl_edge_attributes,
749
                ]
750
            else:
751
                edge_attr_names = [SOURCE_COLUMN_NAME, TARGET_COLUMN_NAME]
×
752
                for attr_name in incl_edge_attributes:
×
753
                    if attr_name not in all_edge_attr_names:
×
754
                        raise KiaraException(
×
755
                            f"Can't include edge attribute {incl_edge_attributes}: not part of the available attributes ({', '.join(all_edge_attr_names)})."
756
                        )
757
                    edge_attr_names.append(attr_name)  # type: ignore
×
758

759
        return edge_attr_names
5✔
760

761
    def retrieve_graph_data(
5✔
762
        self,
763
        nodes_callback: Union[NodesCallback, None] = None,
764
        edges_callback: Union[EdgesCallback, None] = None,
765
        incl_node_attributes: Union[bool, str, Iterable[str]] = False,
766
        incl_edge_attributes: Union[bool, str, Iterable[str]] = False,
767
        omit_self_loops: bool = False,
768
    ):
769
        """Retrieve graph data from the sqlite database, and call the specified callbacks for each node and edge.
770

771
        First the nodes will be processed, then the edges, if that does not suit your needs you can just use this method twice, and set the callback you don't need to None.
772

773
        The nodes_callback will be called with the following arguments:
774
            - node_id: the id of the node (int)
775
            - if False: nothing else
776
            - if True: all node attributes, in the order they are defined in the table schema
777
            - if str: the value of the specified node attribute
778
            - if Iterable[str]: the values of the specified node attributes, in the order they are specified
779

780
        The edges_callback will be called with the following aruments:
781
            - source_id: the id of the source node (int)
782
            - target_id: the id of the target node (int)
783
            - if False: nothing else
784
            - if True: all edge attributes, in the order they are defined in the table schema
785
            - if str: the value of the specified edge attribute
786
            - if Iterable[str]: the values of the specified edge attributes, in the order they are specified
787

788
        """
789

790
        if nodes_callback is not None:
5✔
791
            node_attr_names = self._calculate_node_attributes(incl_node_attributes)
5✔
792

793
            nodes_df = self.nodes.to_polars_dataframe()
5✔
794
            for row in nodes_df.select(*node_attr_names).rows(named=True):
5✔
795
                nodes_callback(**row)  # type: ignore
5✔
796

797
        if edges_callback is not None:
5✔
798
            edge_attr_names = self._calculate_edge_attributes(incl_edge_attributes)
5✔
799

800
            edges_df = self.edges.to_polars_dataframe()
5✔
801
            for row in edges_df.select(*edge_attr_names).rows(named=True):
5✔
802
                if (
5✔
803
                    omit_self_loops
804
                    and row[SOURCE_COLUMN_NAME] == row[TARGET_COLUMN_NAME]
805
                ):
806
                    continue
×
807
                edges_callback(**row)  # type: ignore
5✔
808

809
    def as_networkx_graph(
5✔
810
        self,
811
        graph_type: Type[NETWORKX_GRAPH_TYPE],
812
        incl_node_attributes: Union[bool, str, Iterable[str]] = False,
813
        incl_edge_attributes: Union[bool, str, Iterable[str]] = False,
814
        omit_self_loops: bool = False,
815
    ) -> NETWORKX_GRAPH_TYPE:
816
        """Export the network data as a NetworkX graph object.
817

818
        This method converts the NetworkData to any NetworkX graph type, providing
819
        flexibility to work with the data using NetworkX's extensive algorithm library.
820
        The conversion preserves node and edge attributes as specified.
821

822
        **Supported Graph Types:**
823
        - **nx.Graph**: Undirected simple graph (parallel edges are merged)
824
        - **nx.DiGraph**: Directed simple graph (parallel edges are merged)
825
        - **nx.MultiGraph**: Undirected multigraph (parallel edges preserved)
826
        - **nx.MultiDiGraph**: Directed multigraph (parallel edges preserved)
827

828
        **Attribute Handling:**
829
        Node and edge attributes can be selectively included in the exported graph.
830
        Internal columns (prefixed with '_') are available but typically excluded
831
        from exports to maintain clean NetworkX compatibility.
832

833
        Args:
834
            graph_type: NetworkX graph class to instantiate (nx.Graph, nx.DiGraph, etc.)
835
            incl_node_attributes: Controls which node attributes to include:
836
                - False: No attributes (only node IDs)
837
                - True: All attributes (including computed columns)
838
                - str: Single attribute name to include
839
                - Iterable[str]: List of specific attributes to include
840
            incl_edge_attributes: Controls which edge attributes to include:
841
                - False: No attributes
842
                - True: All attributes (including computed columns)
843
                - str: Single attribute name to include
844
                - Iterable[str]: List of specific attributes to include
845
            omit_self_loops: If True, edges where source equals target are excluded
846

847
        Returns:
848
            NETWORKX_GRAPH_TYPE: NetworkX graph instance of the specified type
849

850
        Note:
851
            When exporting to simple graph types (Graph, DiGraph), parallel edges
852
            are automatically merged. Use MultiGraph or MultiDiGraph to preserve
853
            all edge instances.
854
        """
855

856
        graph: NETWORKX_GRAPH_TYPE = graph_type()
5✔
857

858
        def add_node(_node_id: int, **attrs):
5✔
859
            graph.add_node(_node_id, **attrs)
5✔
860

861
        def add_edge(_source: int, _target: int, **attrs):
5✔
862
            graph.add_edge(_source, _target, **attrs)
5✔
863

864
        self.retrieve_graph_data(
5✔
865
            nodes_callback=add_node,
866
            edges_callback=add_edge,
867
            incl_node_attributes=incl_node_attributes,
868
            incl_edge_attributes=incl_edge_attributes,
869
            omit_self_loops=omit_self_loops,
870
        )
871

872
        return graph
5✔
873

874
    def as_rustworkx_graph(
5✔
875
        self,
876
        graph_type: Type[RUSTWORKX_GRAPH_TYPE],
877
        multigraph: bool = False,
878
        incl_node_attributes: Union[bool, str, Iterable[str]] = False,
879
        incl_edge_attributes: Union[bool, str, Iterable[str]] = False,
880
        omit_self_loops: bool = False,
881
        attach_node_id_map: bool = False,
882
    ) -> RUSTWORKX_GRAPH_TYPE:
883
        """Export the network data as a RustWorkX graph object.
884

885
        RustWorkX provides high-performance graph algorithms implemented in Rust with
886
        Python bindings. This method converts NetworkData to RustWorkX format while
887
        handling the differences in node ID management between the two systems.
888

889
        **Supported Graph Types:**
890
        - **rx.PyGraph**: Undirected graph (with optional multigraph support)
891
        - **rx.PyDiGraph**: Directed graph (with optional multigraph support)
892

893
        **Node ID Mapping:**
894
        RustWorkX uses sequential integer node IDs starting from 0, which may differ
895
        from the original NetworkData node IDs. The original '_node_id' values are
896
        preserved as node attributes, and an optional mapping can be attached to
897
        the graph for reference.
898

899
        **Performance Benefits:**
900
        RustWorkX graphs offer significant performance advantages for:
901
        - Large-scale graph algorithms
902
        - Parallel processing
903
        - Memory-efficient operations
904
        - High-performance centrality calculations
905

906
        Args:
907
            graph_type: RustWorkX graph class (rx.PyGraph or rx.PyDiGraph)
908
            multigraph: If True, parallel edges are preserved; if False, they are merged
909
            incl_node_attributes: Controls which node attributes to include:
910
                - False: No attributes (only node data structure)
911
                - True: All attributes (including computed columns)
912
                - str: Single attribute name to include
913
                - Iterable[str]: List of specific attributes to include
914
            incl_edge_attributes: Controls which edge attributes to include:
915
                - False: No attributes
916
                - True: All attributes (including computed columns)
917
                - str: Single attribute name to include
918
                - Iterable[str]: List of specific attributes to include
919
            omit_self_loops: If True, self-loops (edges where source == target) are excluded
920
            attach_node_id_map: If True, adds a 'node_id_map' attribute to the graph
921
                containing the mapping from RustWorkX node IDs to original NetworkData node IDs
922

923
        Returns:
924
            RUSTWORKX_GRAPH_TYPE: RustWorkX graph instance of the specified type
925

926
        Note:
927
            The original NetworkData '_node_id' values are always included in the
928
            node data dictionary, regardless of the incl_node_attributes setting.
929
        """
930

931
        from bidict import bidict
5✔
932

933
        graph = graph_type(multigraph=multigraph)
5✔
934

935
        # rustworkx uses 0-based integer indexes, so we don't neeed to look up the node ids (unless we want to
936
        # include node attributes)
937

938
        self._calculate_node_attributes(incl_node_attributes)[1:]
5✔
939
        self._calculate_edge_attributes(incl_edge_attributes)[2:]
5✔
940

941
        # we can use a 'global' dict here because we know the nodes are processed before the edges
942
        node_map: bidict = bidict()
5✔
943

944
        def add_node(_node_id: int, **attrs):
5✔
945
            data = {NODE_ID_COLUMN_NAME: _node_id}
5✔
946
            data.update(attrs)
5✔
947

948
            graph_node_id = graph.add_node(data)
5✔
949

950
            node_map[graph_node_id] = _node_id
5✔
951
            # if not _node_id == graph_node_id:
952
            #     raise Exception("Internal error: node ids don't match")
953

954
        def add_edge(_source: int, _target: int, **attrs):
5✔
955
            source = node_map[_source]
5✔
956
            target = node_map[_target]
5✔
957
            if not attrs:
5✔
958
                graph.add_edge(source, target, None)
5✔
959
            else:
960
                graph.add_edge(source, target, attrs)
×
961

962
        self.retrieve_graph_data(
5✔
963
            nodes_callback=add_node,
964
            edges_callback=add_edge,
965
            incl_node_attributes=incl_node_attributes,
966
            incl_edge_attributes=incl_edge_attributes,
967
            omit_self_loops=omit_self_loops,
968
        )
969

970
        if attach_node_id_map:
5✔
971
            graph.attrs = {"node_id_map": node_map}  # type: ignore
×
972

973
        return graph
5✔
974

975
    @property
5✔
976
    def component_ids(self) -> Set[int]:
5✔
977
        import duckdb
×
978

979
        nodes_table = self.nodes.arrow_table  # noqa
×
980
        query = f"""
×
981
        SELECT DISTINCT {COMPONENT_ID_COLUMN_NAME} FROM nodes_table
982
        """
983

984
        result: Set[int] = {(x[0] for x in duckdb.sql(query).fetchall())}  # type: ignore
×
985
        return result
×
986

987

988
class GraphProperties(BaseModel):
5✔
989
    """Properties of graph data, if interpreted as a specific graph type."""
990

991
    number_of_edges: int = Field(description="The number of edges.")
5✔
992
    parallel_edges: int = Field(
5✔
993
        description="The number of parallel edges (if 'multi' graph type).", default=0
994
    )
995

996

997
class ComponentProperties(BaseModel):
5✔
998
    """Properties of a connected component."""
999

1000
    component_id: int = Field(description="The id of the component.")
5✔
1001
    number_of_nodes: int = Field(description="The number of nodes in the component.")
5✔
1002
    number_of_associated_edge_rows: int = Field(
5✔
1003
        description="The number of edge rows associated to the component."
1004
    )
1005

1006

1007
class NetworkGraphProperties(ValueMetadata):
5✔
1008
    """Network data stats."""
1009

1010
    _metadata_key: ClassVar[str] = "network_data"
5✔
1011

1012
    number_of_nodes: int = Field(description="Number of nodes in the network graph.")
5✔
1013
    properties_by_graph_type: Dict[  # type: ignore
5✔
1014
        Literal[
1015
            GraphType.DIRECTED.value,
1016
            GraphType.UNDIRECTED.value,
1017
            GraphType.UNDIRECTED_MULTI.value,
1018
            GraphType.DIRECTED_MULTI.value,
1019
        ],
1020
        GraphProperties,
1021
    ] = Field(description="Properties of the network data, by graph type.")
1022
    number_of_self_loops: int = Field(
5✔
1023
        description="Number of edges where source and target point to the same node."
1024
    )
1025
    number_of_components: int = Field(
5✔
1026
        description="Number of connected components in the network graph."
1027
    )
1028
    components: Dict[int, ComponentProperties] = Field(
5✔
1029
        description="Properties of the components of the network graph."
1030
    )
1031

1032
    @classmethod
5✔
1033
    def retrieve_supported_data_types(cls) -> Iterable[str]:
5✔
1034
        return ["network_data"]
5✔
1035

1036
    @classmethod
5✔
1037
    def create_value_metadata(cls, value: Value) -> "NetworkGraphProperties":
5✔
1038
        import duckdb
5✔
1039

1040
        network_data: NetworkData = value.data
5✔
1041

1042
        num_rows = network_data.num_nodes
5✔
1043
        num_edges = network_data.num_edges
5✔
1044

1045
        # query_num_edges_directed = f"SELECT COUNT(*) FROM (SELECT DISTINCT {SOURCE_COLUMN_NAME}, {TARGET_COLUMN_NAME} FROM {EDGES_TABLE_NAME})"
1046
        query_num_edges_directed = f"SELECT COUNT(*) FROM {EDGES_TABLE_NAME} WHERE {COUNT_IDX_DIRECTED_COLUMN_NAME} = 1"
5✔
1047

1048
        num_edges_directed_result = network_data.query_edges(query_num_edges_directed)
5✔
1049
        num_edges_directed = num_edges_directed_result.columns[0][0].as_py()
5✔
1050

1051
        query_num_edges_undirected = f"SELECT COUNT(*) FROM {EDGES_TABLE_NAME} WHERE {COUNT_IDX_UNDIRECTED_COLUMN_NAME} = 1"
5✔
1052
        num_edges_undirected_result = network_data.query_edges(
5✔
1053
            query_num_edges_undirected
1054
        )
1055
        num_edges_undirected = num_edges_undirected_result.columns[0][0].as_py()
5✔
1056

1057
        self_loop_query = f"SELECT count(*) FROM {EDGES_TABLE_NAME} WHERE {SOURCE_COLUMN_NAME} = {TARGET_COLUMN_NAME}"
5✔
1058
        self_loop_result = network_data.query_edges(self_loop_query)
5✔
1059
        num_self_loops = self_loop_result.columns[0][0].as_py()
5✔
1060

1061
        num_parallel_edges_directed_query = f"SELECT COUNT(*) FROM {EDGES_TABLE_NAME} WHERE {COUNT_IDX_DIRECTED_COLUMN_NAME} = 2"
5✔
1062
        num_parallel_edges_directed_result = network_data.query_edges(
5✔
1063
            num_parallel_edges_directed_query
1064
        )
1065
        num_parallel_edges_directed = num_parallel_edges_directed_result.columns[0][
5✔
1066
            0
1067
        ].as_py()
1068

1069
        num_parallel_edges_undirected_query = f"SELECT COUNT(*) FROM {EDGES_TABLE_NAME} WHERE {COUNT_IDX_UNDIRECTED_COLUMN_NAME} = 2"
5✔
1070
        num_parallel_edges_undirected_result = network_data.query_edges(
5✔
1071
            num_parallel_edges_undirected_query
1072
        )
1073
        num_parallel_edges_undirected = num_parallel_edges_undirected_result.columns[0][
5✔
1074
            0
1075
        ].as_py()
1076

1077
        directed_props = GraphProperties(number_of_edges=num_edges_directed)
5✔
1078
        undirected_props = GraphProperties(number_of_edges=num_edges_undirected)
5✔
1079
        directed_multi_props = GraphProperties(
5✔
1080
            number_of_edges=num_edges, parallel_edges=num_parallel_edges_directed
1081
        )
1082
        undirected_multi_props = GraphProperties(
5✔
1083
            number_of_edges=num_edges, parallel_edges=num_parallel_edges_undirected
1084
        )
1085

1086
        props = {
5✔
1087
            GraphType.DIRECTED.value: directed_props,
1088
            GraphType.DIRECTED_MULTI.value: directed_multi_props,
1089
            GraphType.UNDIRECTED.value: undirected_props,
1090
            GraphType.UNDIRECTED_MULTI.value: undirected_multi_props,
1091
        }
1092

1093
        nodes_table = network_data.nodes.arrow_table  # noqa
5✔
1094
        edges_table = network_data.edges.arrow_table  # noqa
5✔
1095

1096
        components_query_nodes = f"""
5✔
1097
            SELECT
1098
                {COMPONENT_ID_COLUMN_NAME}, COUNT(*)
1099
            FROM
1100
                nodes_table
1101
            GROUP BY {COMPONENT_ID_COLUMN_NAME}
1102
        """
1103
        nodes_result = duckdb.query(components_query_nodes)
5✔
1104
        nodes_data = nodes_result.fetchall()
5✔
1105
        nodes_data = {row[0]: row[1] for row in nodes_data}
5✔
1106

1107
        components_query_edges = f"""
5✔
1108
            SELECT
1109
                {COMPONENT_ID_COLUMN_NAME}, COUNT(*)
1110
            FROM
1111
                edges_table
1112
            GROUP BY {COMPONENT_ID_COLUMN_NAME}
1113
        """
1114
        edges_result = duckdb.query(components_query_edges)
5✔
1115
        edges_data = edges_result.fetchall()
5✔
1116
        edges_data = {row[0]: row[1] for row in edges_data}
5✔
1117

1118
        components_data = {}
5✔
1119
        for component_id, num_nodes in nodes_data.items():
5✔
1120
            num_edges = edges_data.get(component_id, 0)
5✔
1121
            components_data[component_id] = ComponentProperties(
5✔
1122
                component_id=component_id,
1123
                number_of_nodes=num_nodes,
1124
                number_of_associated_edge_rows=num_edges,
1125
            )
1126

1127
        number_of_components = len(components_data)
5✔
1128

1129
        result = cls(
5✔
1130
            number_of_nodes=num_rows,
1131
            properties_by_graph_type=props,
1132
            number_of_self_loops=num_self_loops,
1133
            number_of_components=number_of_components,
1134
            components={k: components_data[k] for k in sorted(components_data.keys())},
1135
        )
1136
        return result
5✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc