• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

graphty-org / graphty-monorepo / 20661584252

02 Jan 2026 03:50PM UTC coverage: 77.924% (+7.3%) from 70.62%
20661584252

push

github

apowers313
ci: fix flakey performance test

13438 of 17822 branches covered (75.4%)

Branch coverage included in aggregate %.

41247 of 52355 relevant lines covered (78.78%)

145534.85 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.07
/graphty-element/src/data/DataSource.ts
1
import { z } from "zod/v4";
15✔
2
import * as z4 from "zod/v4/core";
15✔
3

4
import { AdHocData } from "../config";
5
import { ErrorAggregator } from "./ErrorAggregator.js";
15✔
6

7
// Base configuration interface
8
export interface BaseDataSourceConfig {
9
    data?: string;
10
    file?: File;
11
    url?: string;
12
    chunkSize?: number;
13
    errorLimit?: number;
14
}
15

16
type DataSourceClass = new (opts: object) => DataSource;
17
const dataSourceRegistry = new Map<string, DataSourceClass>();
15✔
18
export interface DataSourceChunk {
19
    nodes: AdHocData[];
20
    edges: AdHocData[];
21
}
22

23
/**
24
 * Base class for all data source implementations that load graph data from various formats.
25
 * Provides common functionality for validation, chunking, error handling, and data fetching.
26
 */
27
export abstract class DataSource {
15✔
28
    static readonly type: string;
29
    static readonly DEFAULT_CHUNK_SIZE = 1000;
34✔
30

31
    edgeSchema: z4.$ZodObject | null = null;
34✔
32
    nodeSchema: z4.$ZodObject | null = null;
34✔
33
    protected errorAggregator: ErrorAggregator;
34
    protected chunkSize: number;
35

36
    /**
37
     * Creates a new DataSource instance.
38
     * @param errorLimit - Maximum number of errors before stopping data processing
39
     * @param chunkSize - Number of nodes to process per chunk
40
     */
41
    constructor(errorLimit = 100, chunkSize = DataSource.DEFAULT_CHUNK_SIZE) {
34✔
42
        this.errorAggregator = new ErrorAggregator(errorLimit);
327✔
43
        this.chunkSize = chunkSize;
327✔
44
    }
327✔
45

46
    // abstract init(): Promise<void>;
47
    abstract sourceFetchData(): AsyncGenerator<DataSourceChunk, void, unknown>;
48

49
    /**
50
     * Subclasses must implement this to expose their config
51
     * Used by getContent() and other shared methods
52
     */
53
    protected abstract getConfig(): BaseDataSourceConfig;
54

55
    /**
56
     * Standardized error message templates
57
     * @returns Object containing error message template functions
58
     */
59
    protected get errorMessages(): {
34✔
60
        missingInput: () => string;
61
        fetchFailed: (url: string, attempts: number, error: string) => string;
62
        parseFailed: (error: string) => string;
63
        invalidFormat: (reason: string) => string;
64
        extractionFailed: (path: string, error: string) => string;
65
    } {
9✔
66
        return {
9✔
67
            missingInput: () => `${this.type}DataSource requires data, file, or url`,
9✔
68

69
            fetchFailed: (url: string, attempts: number, error: string) =>
9✔
70
                `Failed to fetch ${this.type} from ${url} after ${attempts} attempts: ${error}`,
×
71

72
            parseFailed: (error: string) => `Failed to parse ${this.type}: ${error}`,
9✔
73

74
            invalidFormat: (reason: string) => `Invalid ${this.type} format: ${reason}`,
9✔
75

76
            extractionFailed: (path: string, error: string) => `Failed to extract data using path '${path}': ${error}`,
9✔
77
        };
9✔
78
    }
9✔
79

80
    /**
81
     * Fetch with retry logic and timeout
82
     * Protected method for use by all DataSources
83
     * @param url - URL to fetch from
84
     * @param retries - Number of retry attempts on failure
85
     * @param timeout - Timeout in milliseconds for each attempt
86
     * @returns Promise resolving to the fetch Response
87
     */
88
    protected async fetchWithRetry(url: string, retries = 3, timeout = 30000): Promise<Response> {
34✔
89
        // Data URLs don't need retries or timeouts
90
        if (url.startsWith("data:")) {
102!
91
            return await fetch(url);
9✔
92
        }
9✔
93

94
        for (let attempt = 0; attempt < retries; attempt++) {
93✔
95
            try {
97✔
96
                // Create AbortController for timeout
97
                const controller = new AbortController();
97✔
98
                const timeoutId = setTimeout(() => {
97✔
99
                    controller.abort();
×
100
                }, timeout);
97✔
101

102
                try {
97✔
103
                    const response = await fetch(url, { signal: controller.signal });
97✔
104
                    clearTimeout(timeoutId);
91✔
105

106
                    if (!response.ok) {
91!
107
                        throw new Error(`HTTP error! status: ${response.status}`);
×
108
                    }
×
109

110
                    return response;
91✔
111
                } catch (error) {
94!
112
                    clearTimeout(timeoutId);
6✔
113

114
                    if (error instanceof Error && error.name === "AbortError") {
6!
115
                        throw new Error(`Request timeout after ${timeout}ms`);
×
116
                    }
×
117

118
                    throw error;
6✔
119
                }
6✔
120
            } catch (error) {
97!
121
                const isLastAttempt = attempt === retries - 1;
6✔
122

123
                if (isLastAttempt) {
6✔
124
                    const errorMsg = error instanceof Error ? error.message : String(error);
2!
125
                    throw new Error(`Failed to fetch from ${url} after ${retries} attempts: ${errorMsg}`);
2✔
126
                }
2✔
127

128
                // Exponential backoff: wait 1s, 2s, 4s...
129
                const delay = Math.pow(2, attempt) * 1000;
4✔
130
                await new Promise((resolve) => setTimeout(resolve, delay));
4✔
131
            }
4✔
132
        }
97!
133

134
        // Should never reach here
135
        throw new Error("Unexpected error in fetchWithRetry");
4✔
136
    }
102✔
137

138
    /**
139
     * Shared method to get content from data, file, or URL
140
     * Subclasses should call this instead of implementing their own
141
     * @returns Promise resolving to the content string
142
     */
143
    protected async getContent(): Promise<string> {
34✔
144
        const config = this.getConfig();
316✔
145

146
        if (config.data !== undefined) {
316!
147
            return config.data;
207✔
148
        }
207!
149

150
        if (config.file) {
148!
151
            return await config.file.text();
×
152
        }
✔
153

154
        if (config.url) {
133✔
155
            const response = await this.fetchWithRetry(config.url);
100✔
156
            return await response.text();
98✔
157
        }
98!
158

159
        throw new Error(this.errorMessages.missingInput());
9✔
160
    }
316✔
161

162
    /**
163
     * Shared chunking helper
164
     * Yields nodes in chunks, with all edges in the first chunk
165
     * @param nodes - Array of node data objects
166
     * @param edges - Array of edge data objects
167
     * @yields DataSourceChunk objects containing chunked nodes and edges
168
     */
169
    protected *chunkData(nodes: AdHocData[], edges: AdHocData[]): Generator<DataSourceChunk, void, unknown> {
34✔
170
        // Yield nodes in chunks
171
        for (let i = 0; i < nodes.length; i += this.chunkSize) {
256✔
172
            const nodeChunk = nodes.slice(i, i + this.chunkSize);
269✔
173
            const edgeChunk = i === 0 ? edges : [];
269!
174
            yield { nodes: nodeChunk, edges: edgeChunk };
269✔
175
        }
246✔
176

177
        // If no nodes but edges exist, yield edges-only chunk
178
        if (nodes.length === 0 && edges.length > 0) {
256!
179
            yield { nodes: [], edges };
7✔
180
        }
7✔
181
    }
256✔
182

183
    /**
184
     * Get the error aggregator for this data source
185
     * @returns The ErrorAggregator instance tracking validation errors
186
     */
187
    getErrorAggregator(): ErrorAggregator {
34✔
188
        return this.errorAggregator;
229✔
189
    }
229✔
190

191
    /**
192
     * Fetches, validates, and yields graph data in chunks.
193
     * Filters out invalid nodes and edges based on schema validation.
194
     * @yields DataSourceChunk objects containing validated nodes and edges
195
     */
196
    async *getData(): AsyncGenerator<DataSourceChunk, void, unknown> {
34✔
197
        for await (const chunk of this.sourceFetchData()) {
298✔
198
            // Filter out invalid nodes
199
            const validNodes: AdHocData[] = [];
297✔
200
            if (this.nodeSchema) {
297!
201
                for (const n of chunk.nodes) {
4✔
202
                    const isValid = await this.dataValidator(this.nodeSchema, n);
307✔
203
                    if (isValid) {
307✔
204
                        validNodes.push(n);
78✔
205
                    }
78✔
206
                    // Invalid nodes are logged to errorAggregator but skipped
207
                }
307✔
208
            } else {
297✔
209
                validNodes.push(...chunk.nodes);
293✔
210
            }
293✔
211

212
            // Filter out invalid edges
213
            const validEdges: AdHocData[] = [];
297✔
214
            if (this.edgeSchema) {
297!
215
                for (const e of chunk.edges) {
1✔
216
                    const isValid = await this.dataValidator(this.edgeSchema, e);
3✔
217
                    if (isValid) {
3✔
218
                        validEdges.push(e);
2✔
219
                    }
2✔
220
                }
3✔
221
            } else {
297✔
222
                validEdges.push(...chunk.edges);
296✔
223
            }
296✔
224

225
            // Only yield if we have data (or if we're not filtering)
226
            if (validNodes.length > 0 || validEdges.length > 0) {
297!
227
                yield { nodes: validNodes, edges: validEdges };
288✔
228
            }
287✔
229

230
            // Stop if we've hit the error limit
231
            if (this.errorAggregator.hasReachedLimit()) {
297!
232
                break;
1✔
233
            }
1✔
234
        }
297✔
235
    }
298✔
236

237
    /**
238
     * Validate data against schema
239
     * Returns false if validation fails (and adds error to aggregator)
240
     * Returns true if validation succeeds
241
     * @param schema - Zod schema to validate against
242
     * @param obj - Data object to validate
243
     * @returns Promise resolving to true if validation succeeds, false otherwise
244
     */
245
    async dataValidator(schema: z4.$ZodObject, obj: object): Promise<boolean> {
34✔
246
        const res = await z4.safeParseAsync(schema, obj);
310✔
247

248
        if (!res.success) {
310✔
249
            const errMsg = z.prettifyError(res.error);
230✔
250

251
            this.errorAggregator.addError({
230✔
252
                message: `Validation failed: ${errMsg}`,
230✔
253
                category: "validation-error",
230✔
254
            });
230✔
255

256
            return false; // Validation failed
230✔
257
        }
230✔
258

259
        return true; // Validation passed
80✔
260
    }
310✔
261

262
    /**
263
     * Gets the type identifier for this data source instance.
264
     * @returns The type string identifier
265
     */
266
    get type(): string {
34✔
267
        return (this.constructor as typeof DataSource).type;
11✔
268
    }
11✔
269

270
    /**
271
     * Registers a data source class with the registry.
272
     * @param cls - The data source class to register
273
     * @returns The registered class for chaining
274
     */
275
    static register<T extends DataSourceClass>(cls: T): T {
34✔
276
        // eslint-disable-next-line @typescript-eslint/no-explicit-any
277
        const t: string = (cls as any).type;
805✔
278
        dataSourceRegistry.set(t, cls);
805✔
279
        return cls;
805✔
280
    }
805✔
281

282
    /**
283
     * Creates a data source instance by type name.
284
     * @param type - The registered type identifier
285
     * @param opts - Configuration options for the data source
286
     * @returns A new data source instance or null if type not found
287
     */
288
    static get(type: string, opts: object = {}): DataSource | null {
34✔
289
        const SourceClass = dataSourceRegistry.get(type);
111✔
290
        if (SourceClass) {
111✔
291
            return new SourceClass(opts);
111✔
292
        }
111!
293

294
        return null;
×
295
    }
111✔
296

297
    /**
298
     * Get all registered data source types.
299
     * @returns Array of registered data source type names
300
     * @since 1.5.0
301
     * @example
302
     * ```typescript
303
     * const types = DataSource.getRegisteredTypes();
304
     * console.log('Available data sources:', types);
305
     * // ['csv', 'gexf', 'gml', 'graphml', 'json', 'pajek']
306
     * ```
307
     */
308
    static getRegisteredTypes(): string[] {
34✔
309
        return Array.from(dataSourceRegistry.keys()).sort();
×
310
    }
×
311
}
34✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc