• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

moconnell / yolo / 19116598559

05 Nov 2025 09:12PM UTC coverage: 35.004% (-23.2%) from 58.251%
19116598559

push

github

web-flow
Merge pull request #37 from moconnell/36-feat-support-pure-unravel-factor-trading

Feat: support pure Unravel trading

107 of 364 branches covered (29.4%)

Branch coverage included in aggregate %.

30 of 368 new or added lines in 9 files covered. (8.15%)

1 existing line in 1 file now uncovered.

284 of 753 relevant lines covered (37.72%)

15.24 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/YoloAbstractions/FactorDataFrame.cs
1
using System;
2
using System.Collections.Generic;
3
using System.Linq;
4
using MathNet.Numerics.LinearAlgebra;
5
using Microsoft.Data.Analysis;
6
using static YoloAbstractions.FactorType;
7

8
namespace YoloAbstractions;
9

10
public sealed record FactorDataFrame
11
{
12
    private readonly DataFrame _dataFrame;
13
    private readonly Dictionary<string, int> _tickerIndex;
14

NEW
15
    public FactorDataFrame(DataFrame dataFrame, params FactorType[] factorTypes)
×
NEW
16
    {
×
NEW
17
        _dataFrame = dataFrame;
×
NEW
18
        FactorTypes = factorTypes;
×
NEW
19
        var i = 0;
×
NEW
20
        var tickerCol = (StringDataFrameColumn) dataFrame["Ticker"];
×
NEW
21
        var kvps = tickerCol.Select(x => KeyValuePair.Create(x, i++));
×
NEW
22
        _tickerIndex = new Dictionary<string, int>(kvps, StringComparer.OrdinalIgnoreCase);
×
NEW
23
    }
×
24

NEW
25
    public IReadOnlyList<FactorType> FactorTypes { get; init; }
×
26

NEW
27
    public IReadOnlyList<string> Tickers => ((StringDataFrameColumn) _dataFrame["Ticker"]).ToArray();
×
28
    
NEW
29
    public bool IsEmpty => _dataFrame.Rows.Count == 0 || FactorTypes.Count == 0 || Tickers.Count == 0;
×
30

NEW
31
    public static readonly FactorDataFrame Empty = NewFrom([], DateTime.MinValue);
×
32

33
    public static FactorDataFrame NewFrom(
34
        IReadOnlyList<string> tickers,
35
        DateTime timestamp,
36
        params (FactorType FactorType, IReadOnlyList<double> Values)[] values)
NEW
37
    {
×
NEW
38
        if (tickers.Select(x => x.ToUpperInvariant()).Distinct().Count() != tickers.Count)
×
NEW
39
            throw new ArgumentException("Duplicate tickers.", nameof(tickers));
×
40

NEW
41
        if (values.Select(v => v.FactorType).Distinct().Count() != values.Length)
×
NEW
42
            throw new ArgumentException("Duplicate factor types.", nameof(values));
×
43

NEW
44
        foreach (var v in values)
×
NEW
45
        {
×
NEW
46
            if (v.Values.Count != tickers.Count)
×
NEW
47
                throw new ArgumentException(
×
NEW
48
                    $"Length mismatch for {v.FactorType}: expected {tickers.Count}, got {v.Values.Count}.");
×
NEW
49
        }
×
50

NEW
51
        var df = new DataFrame(
×
NEW
52
        [
×
NEW
53
            new PrimitiveDataFrameColumn<DateTime>("Date", Enumerable.Repeat(timestamp, tickers.Count)),
×
NEW
54
            new StringDataFrameColumn("Ticker", tickers),
×
NEW
55
            ..values.Select(tuple => new DoubleDataFrameColumn(tuple.FactorType.ToString(), tuple.Values))
×
NEW
56
        ]);
×
NEW
57
        var factorTypes = values.Select(tuple => tuple.FactorType).ToArray();
×
58

NEW
59
        return new FactorDataFrame(df, factorTypes);
×
NEW
60
    }
×
61

62
    public double this[FactorType factorType, string ticker]
63
    {
64
        get
NEW
65
        {
×
NEW
66
            if (!FactorTypes.Contains(factorType) || !_tickerIndex.TryGetValue(ticker, out var index))
×
NEW
67
            {
×
NEW
68
                return double.NaN;
×
69
            }
70

NEW
71
            var val = (double?) _dataFrame[factorType.ToString()][index];
×
NEW
72
            return val ?? double.NaN;
×
NEW
73
        }
×
74
    }
75

76
    // Indexer by ticker symbol
77
    public IReadOnlyDictionary<FactorType, double> this[string ticker]
78
    {
79
        get
NEW
80
        {
×
81
            // locate the matching row(s)
NEW
82
            if (!_tickerIndex.TryGetValue(ticker, out var rowIndex))
×
NEW
83
                throw new KeyNotFoundException($"{ticker} does not exist");
×
84

85
            // build a dictionary FactorType -> value
NEW
86
            var dict = new Dictionary<FactorType, double>();
×
87

NEW
88
            foreach (var factorType in FactorTypes)
×
NEW
89
            {
×
NEW
90
                var col = (DoubleDataFrameColumn) _dataFrame[factorType.ToString()];
×
NEW
91
                dict[factorType] = col[rowIndex] ?? double.NaN;
×
NEW
92
            }
×
93

NEW
94
            return dict;
×
NEW
95
        }
×
96
    }
97

98
    public static FactorDataFrame operator +(FactorDataFrame one, FactorDataFrame two)
NEW
99
    {
×
NEW
100
        ArgumentNullException.ThrowIfNull(one);
×
NEW
101
        ArgumentNullException.ThrowIfNull(two);
×
102

NEW
103
        if (one.Tickers.Count != two.Tickers.Count ||
×
NEW
104
            one.Tickers.Except(two.Tickers, StringComparer.OrdinalIgnoreCase).Any())
×
NEW
105
            throw new ArgumentException("Ticker sets must match.");
×
106

NEW
107
        var sharedCols = one._dataFrame.Columns
×
NEW
108
            .Select(c => c.Name)
×
NEW
109
            .Intersect(two._dataFrame.Columns.Select(c => c.Name))
×
NEW
110
            .Where(name => !string.Equals(name, "Ticker", StringComparison.Ordinal) &&
×
NEW
111
                           !string.Equals(name, "Date", StringComparison.Ordinal))
×
NEW
112
            .ToArray();
×
NEW
113
        if (sharedCols.Length != 0)
×
NEW
114
            throw new ArgumentException(
×
NEW
115
                $"Cannot merge DataFrames with overlapping Factor columns: {string.Join(", ", sharedCols)}");
×
116

NEW
117
        var joinedColumns = one._dataFrame.Columns
×
NEW
118
            .UnionBy(two._dataFrame.Columns, c => c.Name)
×
NEW
119
            .ToArray();
×
NEW
120
        var df = new DataFrame(joinedColumns);
×
NEW
121
        var joinedFactorTypes = one.FactorTypes.Union(two.FactorTypes).Distinct().ToArray();
×
122

NEW
123
        return new FactorDataFrame(df, joinedFactorTypes);
×
NEW
124
    }
×
125

126
    public FactorDataFrame Normalize(NormalizationMethod method = NormalizationMethod.CrossSectionalZScore)
NEW
127
    {
×
NEW
128
        if (method == NormalizationMethod.None)
×
NEW
129
            return this;
×
130

NEW
131
        var normalizedColumns = new List<DataFrameColumn>
×
NEW
132
        {
×
NEW
133
            _dataFrame["Date"],
×
NEW
134
            _dataFrame["Ticker"]
×
NEW
135
        };
×
136

NEW
137
        foreach (var factorType in FactorTypes.Except([Volatility]))
×
NEW
138
        {
×
NEW
139
            var colName = factorType.ToString();
×
NEW
140
            var col = (DoubleDataFrameColumn) _dataFrame[colName];
×
141

NEW
142
            var normalizedCol = method switch
×
NEW
143
            {
×
NEW
144
                NormalizationMethod.CrossSectionalZScore => NormalizeZScore(col),
×
NEW
145
                NormalizationMethod.MinMax => NormalizeMinMax(col),
×
NEW
146
                NormalizationMethod.Rank => NormalizeRank(col),
×
NEW
147
                _ => throw new ArgumentOutOfRangeException(
×
NEW
148
                    nameof(method),
×
NEW
149
                    method,
×
NEW
150
                    $"Unknown normalization method: {method}")
×
NEW
151
            };
×
152

NEW
153
            normalizedColumns.Add(new DoubleDataFrameColumn(colName, normalizedCol));
×
NEW
154
        }
×
155

156
        // Add volatility column if it exists (don't normalize it)
NEW
157
        if (_dataFrame.Columns.FirstOrDefault(c => c.Name == nameof(Volatility)) is DoubleDataFrameColumn volCol)
×
NEW
158
        {
×
NEW
159
            normalizedColumns.Add(volCol);
×
NEW
160
        }
×
161

NEW
162
        var normalizedDf = new DataFrame(normalizedColumns);
×
NEW
163
        return new FactorDataFrame(normalizedDf, [..FactorTypes]);
×
NEW
164
    }
×
165

166
    private static IEnumerable<double> NormalizeZScore(DoubleDataFrameColumn col)
NEW
167
    {
×
NEW
168
        var values = col.Where(v => v.HasValue && !double.IsNaN(v.Value)).Select(v => v!.Value).ToArray();
×
169

NEW
170
        if (values.Length == 0)
×
NEW
171
            return col.Select(v => double.NaN);
×
172

NEW
173
        var mean = values.Average();
×
NEW
174
        var variance = values.Sum(v => Math.Pow(v - mean, 2)) / values.Length;
×
NEW
175
        var stdDev = Math.Sqrt(variance);
×
176

NEW
177
        if (stdDev < 1e-10) // Avoid division by zero for constant columns
×
NEW
178
            return col.Select(v => 0.0);
×
179

NEW
180
        return col.Select(v => v.HasValue && !double.IsNaN(v.Value)
×
NEW
181
            ? (v.Value - mean) / stdDev
×
NEW
182
            : double.NaN);
×
NEW
183
    }
×
184

185
    private static IEnumerable<double> NormalizeMinMax(DoubleDataFrameColumn col)
NEW
186
    {
×
NEW
187
        var values = col.Where(v => v.HasValue && !double.IsNaN(v.Value)).Select(v => v!.Value).ToArray();
×
188

NEW
189
        if (values.Length == 0)
×
NEW
190
            return col.Select(v => double.NaN);
×
191

NEW
192
        var min = values.Min();
×
NEW
193
        var max = values.Max();
×
NEW
194
        var range = max - min;
×
195

NEW
196
        if (range < 1e-10) // Avoid division by zero
×
NEW
197
            return col.Select(v => 0.0);
×
198

NEW
199
        return col.Select(v => v.HasValue && !double.IsNaN(v.Value)
×
NEW
200
            ? 2 * ((v.Value - min) / range) - 1 // Scale to [-1, 1]
×
NEW
201
            : double.NaN);
×
NEW
202
    }
×
203

204
    private static IEnumerable<double> NormalizeRank(DoubleDataFrameColumn col)
NEW
205
    {
×
NEW
206
        var values = col.Select((v, i) => (Value: v, Index: i)).ToArray();
×
NEW
207
        var validValues = values
×
NEW
208
            .Where(x => x.Value.HasValue && !double.IsNaN(x.Value.Value))
×
NEW
209
            .OrderBy(x => x.Value!.Value)
×
NEW
210
            .ToArray();
×
211

NEW
212
        if (validValues.Length == 0)
×
NEW
213
            return col.Select(v => double.NaN);
×
214

NEW
215
        var ranks = new Dictionary<int, double>();
×
NEW
216
        for (var i = 0; i < validValues.Length; i++)
×
NEW
217
        {
×
218
            // Scale to [-1, 1] range
NEW
219
            ranks[validValues[i].Index] = validValues.Length > 1
×
NEW
220
                ? 2.0 * i / (validValues.Length - 1) - 1
×
NEW
221
                : 0.0;
×
NEW
222
        }
×
223

NEW
224
        return values.Select(x => ranks.TryGetValue(x.Index, out var rank) ? rank : double.NaN);
×
NEW
225
    }
×
226

227
    public DataFrame ApplyWeights(
228
        IReadOnlyDictionary<FactorType, double> weights,
229
        double? maxWeightAbs = null,
230
        bool volatilityScaling = true,
231
        bool normalizePerAsset = true)
NEW
232
    {
×
NEW
233
        ArgumentNullException.ThrowIfNull(weights);
×
234

NEW
235
        var factorCols = _dataFrame.Columns
×
NEW
236
            .Skip(2)
×
NEW
237
            .Where(c => !string.Equals(c.Name, nameof(Volatility), StringComparison.Ordinal))
×
NEW
238
            .OfType<DoubleDataFrameColumn>()
×
NEW
239
            .OrderBy(c => c.Name, StringComparer.Ordinal)
×
NEW
240
            .ToArray();
×
NEW
241
        var alignedWeights = factorCols
×
NEW
242
            .Select(c => Enum.TryParse<FactorType>(c.Name, out var ft) && weights.TryGetValue(ft, out var w) ? w : 0d)
×
NEW
243
            .ToArray();
×
244

NEW
245
        var rows = (int) _dataFrame.Rows.Count;
×
NEW
246
        var columns = factorCols.Length;
×
247

NEW
248
        var tickerWeightsVector = GetWeights();
×
249

NEW
250
        if (volatilityScaling &&
×
NEW
251
            _dataFrame.Columns.FirstOrDefault(c => c.Name == nameof(Volatility)) is DoubleDataFrameColumn volCol)
×
NEW
252
        {
×
NEW
253
            var vol = Vector<double>.Build.DenseOfArray(
×
NEW
254
                volCol.Select(x => x is > 0d ? x.Value : 1d).ToArray());
×
NEW
255
            tickerWeightsVector = tickerWeightsVector.PointwiseDivide(vol);
×
NEW
256
        }
×
257

NEW
258
        if (maxWeightAbs.HasValue)
×
NEW
259
        {
×
NEW
260
            tickerWeightsVector.MapInplace(x => Math.Clamp(x, -maxWeightAbs.Value, maxWeightAbs.Value));
×
NEW
261
        }
×
262

NEW
263
        var resultDf = new DataFrame();
×
NEW
264
        resultDf.Columns.Add(_dataFrame["Date"]);
×
NEW
265
        resultDf.Columns.Add(_dataFrame["Ticker"]);
×
NEW
266
        resultDf.Columns.Add(new DoubleDataFrameColumn("Weight", tickerWeightsVector));
×
267

NEW
268
        return resultDf;
×
269

270
        Vector<double> GetWeights()
271
        {
272
            if (normalizePerAsset && HasMissingValues())
273
            {
274
                // Calculate weight per asset, normalizing only by available factors
275
                var tickerWeights = new double[rows];
276
                for (var row = 0; row < rows; row++)
277
                {
278
                    double weightSum = 0;
279
                    double normalizerSum = 0;
280

281
                    for (var col = 0; col < columns; col++)
282
                    {
283
                        var value = factorCols[col][row];
284
                        if (value.HasValue && !double.IsNaN(value.Value))
285
                        {
286
                            weightSum += value.Value * alignedWeights[col];
287
                            normalizerSum += Math.Abs(alignedWeights[col]);
288
                        }
289
                    }
290

291
                    tickerWeights[row] = normalizerSum > 0 ? weightSum / normalizerSum : 0;
292
                }
293

294
                var tickerWeightsVector = Vector<double>.Build.DenseOfArray(tickerWeights);
295

296
                return tickerWeightsVector;
297
            }
298
            else
299
            {
300
                var data = factorCols
301
                    .Select(c => c.Select(x => x ?? 0d).ToArray())
302
                    .ToArray();
303

304
                var m = Matrix<double>.Build.DenseOfColumns(rows, columns, data);
305
                var v = Vector<double>.Build.DenseOfArray(alignedWeights);
306

307
                var normalizer = alignedWeights.Sum(Math.Abs);
308
                if (normalizer <= 0)
309
                    normalizer = 1;
310
                var tickerWeights = m * v / normalizer; // (rows x 1)
311

312
                return tickerWeights;
313
            }
314
        }
315

316
        bool HasMissingValues()
317
        {
318
            return factorCols.Any(c => c.Any(value => value is null or double.NaN));
319
        }
NEW
320
    }
×
321

322
    public override string ToString()
NEW
323
    {
×
NEW
324
        if (_dataFrame.Rows.Count == 0)
×
NEW
325
            return "Empty FactorDataFrame";
×
326

NEW
327
        var sb = new System.Text.StringBuilder();
×
NEW
328
        var columns = _dataFrame.Columns.ToList();
×
NEW
329
        var rowCount = (int) _dataFrame.Rows.Count;
×
330

331
        // Calculate column widths
NEW
332
        var columnWidths = new int[columns.Count];
×
NEW
333
        for (var i = 0; i < columns.Count; i++)
×
NEW
334
        {
×
NEW
335
            var col = columns[i];
×
NEW
336
            columnWidths[i] = col.Name.Length;
×
337

NEW
338
            for (var row = 0; row < rowCount; row++)
×
NEW
339
            {
×
NEW
340
                var value = FormatValue(col[row]);
×
NEW
341
                columnWidths[i] = Math.Max(columnWidths[i], value.Length);
×
NEW
342
            }
×
NEW
343
        }
×
344

345
        // Add row index width
NEW
346
        var indexWidth = Math.Max(rowCount.ToString().Length, 0) + 2;
×
347

348
        // Header row
NEW
349
        sb.Append(new string(' ', indexWidth));
×
NEW
350
        for (var i = 0; i < columns.Count; i++)
×
NEW
351
        {
×
NEW
352
            sb.Append(columns[i].Name.PadLeft(columnWidths[i] + 2));
×
NEW
353
        }
×
354

NEW
355
        sb.AppendLine();
×
356

357
        // Separator line
NEW
358
        sb.Append(new string(' ', indexWidth));
×
NEW
359
        for (var i = 0; i < columns.Count; i++)
×
NEW
360
        {
×
NEW
361
            sb.Append(new string('-', columnWidths[i] + 2));
×
NEW
362
        }
×
363

NEW
364
        sb.AppendLine();
×
365

366
        // Data rows
NEW
367
        for (var row = 0; row < rowCount; row++)
×
NEW
368
        {
×
369
            // Row index
NEW
370
            sb.Append(row.ToString().PadLeft(indexWidth));
×
371

372
            // Column values
NEW
373
            for (var col = 0; col < columns.Count; col++)
×
NEW
374
            {
×
NEW
375
                var value = FormatValue(columns[col][row]);
×
NEW
376
                sb.Append(value.PadLeft(columnWidths[col] + 2));
×
NEW
377
            }
×
378

NEW
379
            sb.AppendLine();
×
NEW
380
        }
×
381

NEW
382
        sb.AppendLine();
×
NEW
383
        sb.AppendLine($"[{rowCount} rows x {columns.Count} columns]");
×
384

NEW
385
        return sb.ToString();
×
NEW
386
    }
×
387

388
    private static string FormatValue(object? value)
NEW
389
    {
×
NEW
390
        return value switch
×
NEW
391
        {
×
NEW
392
            null => "NaN",
×
NEW
393
            double.NaN => "NaN",
×
NEW
394
            double d => d.ToString("F6"),
×
NEW
395
            DateOnly date => date.ToString("yyyy-MM-dd"),
×
NEW
396
            DateTime { Hour: 0, Minute: 0, Second: 0, Millisecond: 0, Microsecond: 0 } dt => dt.ToString("yyyy-MM-dd"),
×
NEW
397
            DateTime dt => dt.ToString("yyyy-MM-dd hh:mm:ss"),
×
NEW
398
            string s => s,
×
NEW
399
            _ => value.ToString() ?? "null"
×
NEW
400
        };
×
NEW
401
    }
×
402
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc