• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

graphty-org / graphty-element / 20514590651

26 Dec 2025 02:37AM UTC coverage: 70.559% (-0.3%) from 70.836%
20514590651

push

github

apowers313
ci: fix npm ci

9591 of 13363 branches covered (71.77%)

Branch coverage included in aggregate %.

25136 of 35854 relevant lines covered (70.11%)

6233.71 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

79.93
/src/data/DataSource.ts
1
import {z} from "zod/v4";
15✔
2
import * as z4 from "zod/v4/core";
15✔
3

4
import {AdHocData} from "../config";
5
import {ErrorAggregator} from "./ErrorAggregator.js";
15✔
6

7
// Base configuration interface
8
export interface BaseDataSourceConfig {
9
    data?: string;
10
    file?: File;
11
    url?: string;
12
    chunkSize?: number;
13
    errorLimit?: number;
14
}
15

16
type DataSourceClass = new (opts: object) => DataSource;
17
const dataSourceRegistry = new Map<string, DataSourceClass>();
15✔
18
export interface DataSourceChunk {
19
    nodes: AdHocData[];
20
    edges: AdHocData[];
21
}
22

23
/**
24
 * Base class for all data source implementations that load graph data from various formats.
25
 * Provides common functionality for validation, chunking, error handling, and data fetching.
26
 */
27
export abstract class DataSource {
15✔
28
    static readonly type: string;
29
    static readonly DEFAULT_CHUNK_SIZE = 1000;
34✔
30

31
    edgeSchema: z4.$ZodObject | null = null;
34✔
32
    nodeSchema: z4.$ZodObject | null = null;
34✔
33
    protected errorAggregator: ErrorAggregator;
34
    protected chunkSize: number;
35

36
    /**
37
     * Creates a new DataSource instance.
38
     * @param errorLimit - Maximum number of errors before stopping data processing
39
     * @param chunkSize - Number of nodes to process per chunk
40
     */
41
    constructor(errorLimit = 100, chunkSize = DataSource.DEFAULT_CHUNK_SIZE) {
34✔
42
        this.errorAggregator = new ErrorAggregator(errorLimit);
327✔
43
        this.chunkSize = chunkSize;
327✔
44
    }
327✔
45

46
    // abstract init(): Promise<void>;
47
    abstract sourceFetchData(): AsyncGenerator<DataSourceChunk, void, unknown>;
48

49
    /**
50
     * Subclasses must implement this to expose their config
51
     * Used by getContent() and other shared methods
52
     */
53
    protected abstract getConfig(): BaseDataSourceConfig;
54

55
    /**
56
     * Standardized error message templates
57
     * @returns Object containing error message template functions
58
     */
59
    protected get errorMessages(): {
34✔
60
        missingInput: () => string;
61
        fetchFailed: (url: string, attempts: number, error: string) => string;
62
        parseFailed: (error: string) => string;
63
        invalidFormat: (reason: string) => string;
64
        extractionFailed: (path: string, error: string) => string;
65
    } {
9✔
66
        return {
9✔
67
            missingInput: () =>
9✔
68
                `${this.type}DataSource requires data, file, or url`,
9✔
69

70
            fetchFailed: (url: string, attempts: number, error: string) =>
9✔
71
                `Failed to fetch ${this.type} from ${url} after ${attempts} attempts: ${error}`,
×
72

73
            parseFailed: (error: string) =>
9✔
74
                `Failed to parse ${this.type}: ${error}`,
×
75

76
            invalidFormat: (reason: string) =>
9✔
77
                `Invalid ${this.type} format: ${reason}`,
×
78

79
            extractionFailed: (path: string, error: string) =>
9✔
80
                `Failed to extract data using path '${path}': ${error}`,
×
81
        };
9✔
82
    }
9✔
83

84
    /**
85
     * Fetch with retry logic and timeout
86
     * Protected method for use by all DataSources
87
     * @param url - URL to fetch from
88
     * @param retries - Number of retry attempts on failure
89
     * @param timeout - Timeout in milliseconds for each attempt
90
     * @returns Promise resolving to the fetch Response
91
     */
92
    protected async fetchWithRetry(
34✔
93
        url: string,
102✔
94
        retries = 3,
102✔
95
        timeout = 30000,
102✔
96
    ): Promise<Response> {
102✔
97
        // Data URLs don't need retries or timeouts
98
        if (url.startsWith("data:")) {
102!
99
            return await fetch(url);
9✔
100
        }
9✔
101

102
        for (let attempt = 0; attempt < retries; attempt++) {
93✔
103
            try {
97✔
104
                // Create AbortController for timeout
105
                const controller = new AbortController();
97✔
106
                const timeoutId = setTimeout(() => {
97✔
107
                    controller.abort();
×
108
                }, timeout);
97✔
109

110
                try {
97✔
111
                    const response = await fetch(url, {signal: controller.signal});
97✔
112
                    clearTimeout(timeoutId);
91✔
113

114
                    if (!response.ok) {
91!
115
                        throw new Error(`HTTP error! status: ${response.status}`);
×
116
                    }
×
117

118
                    return response;
91✔
119
                } catch (error) {
94!
120
                    clearTimeout(timeoutId);
6✔
121

122
                    if (error instanceof Error && error.name === "AbortError") {
6!
123
                        throw new Error(`Request timeout after ${timeout}ms`);
×
124
                    }
×
125

126
                    throw error;
6✔
127
                }
6✔
128
            } catch (error) {
97!
129
                const isLastAttempt = attempt === retries - 1;
6✔
130

131
                if (isLastAttempt) {
6✔
132
                    const errorMsg = error instanceof Error ? error.message : String(error);
2!
133
                    throw new Error(
2✔
134
                        `Failed to fetch from ${url} after ${retries} attempts: ${errorMsg}`,
2✔
135
                    );
2✔
136
                }
2✔
137

138
                // Exponential backoff: wait 1s, 2s, 4s...
139
                const delay = Math.pow(2, attempt) * 1000;
4✔
140
                await new Promise((resolve) => setTimeout(resolve, delay));
4✔
141
            }
4✔
142
        }
97!
143

144
        // Should never reach here
145
        throw new Error("Unexpected error in fetchWithRetry");
4✔
146
    }
102✔
147

148
    /**
149
     * Shared method to get content from data, file, or URL
150
     * Subclasses should call this instead of implementing their own
151
     * @returns Promise resolving to the content string
152
     */
153
    protected async getContent(): Promise<string> {
34✔
154
        const config = this.getConfig();
316✔
155

156
        if (config.data !== undefined) {
316!
157
            return config.data;
207✔
158
        }
207!
159

160
        if (config.file) {
148!
161
            return await config.file.text();
×
162
        }
✔
163

164
        if (config.url) {
133✔
165
            const response = await this.fetchWithRetry(config.url);
100✔
166
            return await response.text();
98✔
167
        }
98!
168

169
        throw new Error(this.errorMessages.missingInput());
9✔
170
    }
316✔
171

172
    /**
173
     * Shared chunking helper
174
     * Yields nodes in chunks, with all edges in the first chunk
175
     * @param nodes - Array of node data objects
176
     * @param edges - Array of edge data objects
177
     * @yields DataSourceChunk objects containing chunked nodes and edges
178
     */
179
    protected *chunkData(
34✔
180
        nodes: AdHocData[],
256✔
181
        edges: AdHocData[],
256✔
182
    ): Generator<DataSourceChunk, void, unknown> {
256✔
183
        // Yield nodes in chunks
184
        for (let i = 0; i < nodes.length; i += this.chunkSize) {
256✔
185
            const nodeChunk = nodes.slice(i, i + this.chunkSize);
269✔
186
            const edgeChunk = i === 0 ? edges : [];
269!
187
            yield {nodes: nodeChunk, edges: edgeChunk};
269✔
188
        }
246✔
189

190
        // If no nodes but edges exist, yield edges-only chunk
191
        if (nodes.length === 0 && edges.length > 0) {
256!
192
            yield {nodes: [], edges};
7✔
193
        }
7✔
194
    }
256✔
195

196
    /**
197
     * Get the error aggregator for this data source
198
     * @returns The ErrorAggregator instance tracking validation errors
199
     */
200
    getErrorAggregator(): ErrorAggregator {
34✔
201
        return this.errorAggregator;
229✔
202
    }
229✔
203

204
    /**
205
     * Fetches, validates, and yields graph data in chunks.
206
     * Filters out invalid nodes and edges based on schema validation.
207
     * @yields DataSourceChunk objects containing validated nodes and edges
208
     */
209
    async *getData(): AsyncGenerator<DataSourceChunk, void, unknown> {
34✔
210
        for await (const chunk of this.sourceFetchData()) {
298✔
211
            // Filter out invalid nodes
212
            const validNodes: AdHocData[] = [];
297✔
213
            if (this.nodeSchema) {
297!
214
                for (const n of chunk.nodes) {
4✔
215
                    const isValid = await this.dataValidator(this.nodeSchema, n);
307✔
216
                    if (isValid) {
307✔
217
                        validNodes.push(n);
78✔
218
                    }
78✔
219
                    // Invalid nodes are logged to errorAggregator but skipped
220
                }
307✔
221
            } else {
297✔
222
                validNodes.push(... chunk.nodes);
293✔
223
            }
293✔
224

225
            // Filter out invalid edges
226
            const validEdges: AdHocData[] = [];
297✔
227
            if (this.edgeSchema) {
297!
228
                for (const e of chunk.edges) {
1✔
229
                    const isValid = await this.dataValidator(this.edgeSchema, e);
3✔
230
                    if (isValid) {
3✔
231
                        validEdges.push(e);
2✔
232
                    }
2✔
233
                }
3✔
234
            } else {
297✔
235
                validEdges.push(... chunk.edges);
296✔
236
            }
296✔
237

238
            // Only yield if we have data (or if we're not filtering)
239
            if (validNodes.length > 0 || validEdges.length > 0) {
297!
240
                yield {nodes: validNodes, edges: validEdges};
288✔
241
            }
287✔
242

243
            // Stop if we've hit the error limit
244
            if (this.errorAggregator.hasReachedLimit()) {
297!
245
                break;
1✔
246
            }
1✔
247
        }
297✔
248
    }
298✔
249

250
    /**
251
     * Validate data against schema
252
     * Returns false if validation fails (and adds error to aggregator)
253
     * Returns true if validation succeeds
254
     * @param schema - Zod schema to validate against
255
     * @param obj - Data object to validate
256
     * @returns Promise resolving to true if validation succeeds, false otherwise
257
     */
258
    async dataValidator(schema: z4.$ZodObject, obj: object): Promise<boolean> {
34✔
259
        const res = await z4.safeParseAsync(schema, obj);
310✔
260

261
        if (!res.success) {
310✔
262
            const errMsg = z.prettifyError(res.error);
230✔
263

264
            this.errorAggregator.addError({
230✔
265
                message: `Validation failed: ${errMsg}`,
230✔
266
                category: "validation-error",
230✔
267
            });
230✔
268

269
            return false; // Validation failed
230✔
270
        }
230✔
271

272
        return true; // Validation passed
80✔
273
    }
310✔
274

275
    /**
276
     * Gets the type identifier for this data source instance.
277
     * @returns The type string identifier
278
     */
279
    get type(): string {
34✔
280
        return (this.constructor as typeof DataSource).type;
11✔
281
    }
11✔
282

283
    /**
284
     * Registers a data source class with the registry.
285
     * @param cls - The data source class to register
286
     * @returns The registered class for chaining
287
     */
288
    static register<T extends DataSourceClass>(cls: T): T {
34✔
289
        // eslint-disable-next-line @typescript-eslint/no-explicit-any
290
        const t: string = (cls as any).type;
805✔
291
        dataSourceRegistry.set(t, cls);
805✔
292
        return cls;
805✔
293
    }
805✔
294

295
    /**
296
     * Creates a data source instance by type name.
297
     * @param type - The registered type identifier
298
     * @param opts - Configuration options for the data source
299
     * @returns A new data source instance or null if type not found
300
     */
301
    static get(type: string, opts: object = {}): DataSource | null {
34✔
302
        const SourceClass = dataSourceRegistry.get(type);
111✔
303
        if (SourceClass) {
111✔
304
            return new SourceClass(opts);
111✔
305
        }
111!
306

307
        return null;
×
308
    }
111✔
309

310
    /**
311
     * Get all registered data source types.
312
     * @returns Array of registered data source type names
313
     * @since 1.5.0
314
     * @example
315
     * ```typescript
316
     * const types = DataSource.getRegisteredTypes();
317
     * console.log('Available data sources:', types);
318
     * // ['csv', 'gexf', 'gml', 'graphml', 'json', 'pajek']
319
     * ```
320
     */
321
    static getRegisteredTypes(): string[] {
34✔
322
        return Array.from(dataSourceRegistry.keys()).sort();
×
323
    }
×
324
}
34✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc