• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

moconnell / yolo / 21807100357

08 Feb 2026 11:08PM UTC coverage: 85.297% (-0.02%) from 85.313%
21807100357

push

github

moconnell
test: DataFrameExtensionsTest

264 of 279 branches covered (94.62%)

Branch coverage included in aggregate %.

1807 of 2149 relevant lines covered (84.09%)

37.24 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.33
/src/YoloAbstractions/FactorDataFrame.cs
1
using MathNet.Numerics.LinearAlgebra;
2
using Microsoft.Data.Analysis;
3
using YoloAbstractions.Extensions;
4
using static YoloAbstractions.FactorType;
5

6
namespace YoloAbstractions;
7

8
public sealed record FactorDataFrame
9
{
10
    private readonly DataFrame _dataFrame;
11
    private readonly Dictionary<string, int> _tickerIndex;
12

13
    public FactorDataFrame(DataFrame dataFrame, params FactorType[] factorTypes)
145✔
14
    {
145✔
15
        _dataFrame = dataFrame;
145✔
16
        FactorTypes = factorTypes;
145✔
17
        var i = 0;
145✔
18
        var tickerCol = (StringDataFrameColumn)dataFrame["Ticker"];
145✔
19
        var kvps = tickerCol.Select(x => KeyValuePair.Create(x, i++));
915✔
20
        _tickerIndex = new Dictionary<string, int>(kvps, StringComparer.OrdinalIgnoreCase);
145✔
21
    }
145✔
22

23
    public IReadOnlyList<FactorType> FactorTypes { get; init; }
468✔
24

25
    public IReadOnlyList<string> Tickers => ((StringDataFrameColumn)_dataFrame["Ticker"]).ToArray();
181✔
26

27
    public bool IsEmpty => _dataFrame.Rows.Count == 0 || FactorTypes.Count == 0 || Tickers.Count == 0;
21✔
28

29
    public static readonly FactorDataFrame Empty = NewFrom([], DateTime.MinValue);
3✔
30

31
    public static FactorDataFrame NewFrom(
32
        IReadOnlyList<string> tickers,
33
        DateTime timestamp,
34
        params (FactorType FactorType, IReadOnlyList<double> Values)[] values)
35
    {
93✔
36
        if (tickers.Select(x => x.ToUpperInvariant()).Distinct().Count() != tickers.Count)
522✔
37
            throw new ArgumentException("Duplicate tickers.", nameof(tickers));
1✔
38

39
        if (values.Select(v => v.FactorType).Distinct().Count() != values.Length)
206✔
40
            throw new ArgumentException("Duplicate factor types.", nameof(values));
1✔
41

42
        foreach (var v in values)
496✔
43
        {
112✔
44
            if (v.Values.Count != tickers.Count)
112✔
45
                throw new ArgumentException(
1✔
46
                    $"Length mismatch for {v.FactorType}: expected {tickers.Count}, got {v.Values.Count}.");
1✔
47
        }
111✔
48

49
        var df = new DataFrame(
90✔
50
        [
90✔
51
            new PrimitiveDataFrameColumn<DateTime>("Date", Enumerable.Repeat(timestamp, tickers.Count)),
90✔
52
            new StringDataFrameColumn("Ticker", tickers),
90✔
53
            ..values.Select(tuple => new DoubleDataFrameColumn(tuple.FactorType.ToString(), tuple.Values))
110✔
54
        ]);
90✔
55
        var factorTypes = values.Select(tuple => tuple.FactorType).ToArray();
200✔
56

57
        return new FactorDataFrame(df, factorTypes);
90✔
58
    }
90✔
59

60
    public double this[FactorType factorType, string ticker]
61
    {
62
        get
63
        {
191✔
64
            if (!FactorTypes.Contains(factorType) || !_tickerIndex.TryGetValue(ticker, out var index))
191✔
65
            {
3✔
66
                return double.NaN;
3✔
67
            }
68

69
            var val = (double?)_dataFrame[factorType.ToString()][index];
188✔
70
            return val ?? double.NaN;
188✔
71
        }
191✔
72
    }
73

74
    // Indexer by ticker symbol
75
    public IReadOnlyDictionary<FactorType, double> this[string ticker]
76
    {
77
        get
78
        {
12✔
79
            // locate the matching row(s)
80
            if (!_tickerIndex.TryGetValue(ticker, out var rowIndex))
12✔
81
                throw new KeyNotFoundException($"{ticker} does not exist");
1✔
82

83
            // build a dictionary FactorType -> value
84
            var dict = new Dictionary<FactorType, double>();
11✔
85

86
            foreach (var factorType in FactorTypes)
63✔
87
            {
15✔
88
                var col = (DoubleDataFrameColumn)_dataFrame[factorType.ToString()];
15✔
89
                dict[factorType] = col[rowIndex] ?? double.NaN;
15✔
90
            }
15✔
91

92
            return dict;
11✔
93
        }
11✔
94
    }
95

96
    public static FactorDataFrame operator +(FactorDataFrame one, FactorDataFrame two)
97
    {
33✔
98
        ArgumentNullException.ThrowIfNull(one);
33✔
99
        ArgumentNullException.ThrowIfNull(two);
32✔
100

101
        if (one.Tickers.Count != two.Tickers.Count ||
31✔
102
            one.Tickers.Except(two.Tickers, StringComparer.OrdinalIgnoreCase).Any())
31✔
103
            throw new ArgumentException("Ticker sets must match.");
3✔
104

105
        var sharedCols = one._dataFrame.Columns
28✔
106
            .Select(c => c.Name)
117✔
107
            .Intersect(two._dataFrame.Columns.Select(c => c.Name))
86✔
108
            .Where(name => !string.Equals(name, "Ticker", StringComparison.Ordinal) &&
58✔
109
                           !string.Equals(name, "Date", StringComparison.Ordinal))
58✔
110
            .ToArray();
28✔
111
        if (sharedCols.Length != 0)
28✔
112
            throw new ArgumentException(
2✔
113
                $"Cannot merge DataFrames with overlapping Factor columns: {string.Join(", ", sharedCols)}");
2✔
114

115
        var joinedColumns = one._dataFrame.Columns
26✔
116
            .UnionBy(two._dataFrame.Columns, c => c.Name)
189✔
117
            .ToArray();
26✔
118
        var df = new DataFrame(joinedColumns);
26✔
119
        var joinedFactorTypes = one.FactorTypes.Union(two.FactorTypes).Distinct().ToArray();
26✔
120

121
        return new FactorDataFrame(df, joinedFactorTypes);
26✔
122
    }
26✔
123

124
    public FactorDataFrame Normalize(NormalizationMethod method = NormalizationMethod.None, int? quantiles = null, params FactorType[] preserveFactors)
125
    {
22✔
126
        if (method == NormalizationMethod.None)
22✔
127
            return this;
12✔
128

129
        if (method == NormalizationMethod.CrossSectionalBins && (!quantiles.HasValue || quantiles <= 0))
10✔
130
        {
×
131
            throw new ArgumentException($"{nameof(quantiles)}: quantiles must be a positive integer when using CrossSectionalBins normalization.");
×
132
        }
133

134
        var normalizedColumns = new List<DataFrameColumn>
10✔
135
        {
10✔
136
            _dataFrame["Date"],
10✔
137
            _dataFrame["Ticker"]
10✔
138
        };
10✔
139

140
        foreach (var factorType in FactorTypes)
53✔
141
        {
12✔
142
            var colName = factorType.ToString();
12✔
143
            DoubleDataFrameColumn col = (DoubleDataFrameColumn)_dataFrame[colName];
12✔
144

145
            if (preserveFactors.Contains(factorType))
12✔
146
            {
1✔
147
                normalizedColumns.Add(col);
1✔
148
                continue;
1✔
149
            }
150

151
            var normalizedCol = method switch
11✔
152
            {
11✔
153
                NormalizationMethod.CrossSectionalBins => col.NormalizeBins(quantiles!.Value),
4✔
154
                NormalizationMethod.CrossSectionalZScore => col.NormalizeZScore(),
2✔
155
                NormalizationMethod.MinMax => col.NormalizeMinMax(),
2✔
156
                NormalizationMethod.Rank => col.NormalizeRank(),
2✔
157
                _ => throw new ArgumentOutOfRangeException(
1✔
158
                    nameof(method),
1✔
159
                    method,
1✔
160
                    $"Unknown normalization method: {method}")
1✔
161
            };
11✔
162

163
            normalizedColumns.Add(new DoubleDataFrameColumn(colName, normalizedCol));
10✔
164
        }
10✔
165

166
        var normalizedDf = new DataFrame(normalizedColumns);
9✔
167

168
        return new FactorDataFrame(normalizedDf, [.. FactorTypes]);
9✔
169
    }
21✔
170

171
    public DataFrame ApplyWeights(
172
        IReadOnlyDictionary<FactorType, double> weights,
173
        double? maxWeightAbs = null,
174
        bool volatilityScaling = true,
175
        bool normalizePerAsset = true)
176
    {
19✔
177
        ArgumentNullException.ThrowIfNull(weights);
19✔
178

179
        var factorCols = _dataFrame.Columns
18✔
180
            .Skip(2)
18✔
181
            .Where(c => !string.Equals(c.Name, nameof(Volatility), StringComparison.Ordinal))
41✔
182
            .OfType<DoubleDataFrameColumn>()
18✔
183
            .OrderBy(c => c.Name, StringComparer.Ordinal)
31✔
184
            .ToArray();
18✔
185
        var alignedWeights = factorCols
18✔
186
            .Select(c => Enum.TryParse<FactorType>(c.Name, out var ft) && weights.TryGetValue(ft, out var w) ? w : 0d)
36✔
187
            .ToArray();
18✔
188

189
        var rows = (int)_dataFrame.Rows.Count;
18✔
190
        var columns = factorCols.Length;
18✔
191

192
        var tickerWeightsVector = GetWeights();
18✔
193

194
        if (volatilityScaling &&
18✔
195
            _dataFrame.Columns.FirstOrDefault(c => c.Name == nameof(Volatility)) is DoubleDataFrameColumn volCol)
83✔
196
        {
3✔
197
            var vol = Vector<double>.Build.DenseOfArray(
3✔
198
                volCol.Select(x => x is > 0d ? x.Value : 1d).ToArray());
11✔
199
            tickerWeightsVector = tickerWeightsVector.PointwiseDivide(vol);
3✔
200
        }
3✔
201

202
        if (maxWeightAbs.HasValue)
18✔
203
        {
3✔
204
            tickerWeightsVector.MapInplace(x => Math.Clamp(x, -maxWeightAbs.Value, maxWeightAbs.Value));
11✔
205
        }
3✔
206

207
        var resultDf = new DataFrame();
18✔
208
        resultDf.Columns.Add(_dataFrame["Date"]);
18✔
209
        resultDf.Columns.Add(_dataFrame["Ticker"]);
18✔
210
        resultDf.Columns.Add(new DoubleDataFrameColumn("Weight", tickerWeightsVector));
18✔
211

212
        return resultDf;
18✔
213

214
        Vector<double> GetWeights()
215
        {
216
            if (normalizePerAsset && HasMissingValues())
217
            {
218
                // Calculate weight per asset, normalizing only by available factors
219
                var tickerWeights = new double[rows];
220
                for (var row = 0; row < rows; row++)
221
                {
222
                    double weightSum = 0;
223
                    double normalizerSum = 0;
224

225
                    for (var col = 0; col < columns; col++)
226
                    {
227
                        var value = factorCols[col][row];
228
                        if (value.HasValue && !double.IsNaN(value.Value))
229
                        {
230
                            weightSum += value.Value * alignedWeights[col];
231
                            normalizerSum += Math.Abs(alignedWeights[col]);
232
                        }
233
                    }
234

235
                    tickerWeights[row] = normalizerSum > 0 ? weightSum / normalizerSum : 0;
236
                }
237

238
                var tickerWeightsVector = Vector<double>.Build.DenseOfArray(tickerWeights);
239

240
                return tickerWeightsVector;
241
            }
242
            else
243
            {
244
                var data = factorCols
245
                    .Select(c => c.Select(x => x ?? 0d).ToArray())
246
                    .ToArray();
247

248
                var m = Matrix<double>.Build.DenseOfColumns(rows, columns, data);
249
                var v = Vector<double>.Build.DenseOfArray(alignedWeights);
250

251
                var normalizer = alignedWeights.Sum(Math.Abs);
252
                if (normalizer <= 0)
253
                    normalizer = 1;
254
                var tickerWeights = m * v / normalizer; // (rows x 1)
255

256
                return tickerWeights;
257
            }
258
        }
259

260
        bool HasMissingValues()
261
        {
262
            return factorCols.Any(c => c.Any(value => value is null or double.NaN));
263
        }
264
    }
18✔
265

266
    public override string ToString()
267
    {
30✔
268
        if (_dataFrame.Rows.Count == 0)
30✔
269
            return "Empty FactorDataFrame";
2✔
270

271
        var sb = new System.Text.StringBuilder();
28✔
272
        var columns = _dataFrame.Columns.ToList();
28✔
273
        var rowCount = (int)_dataFrame.Rows.Count;
28✔
274

275
        // Calculate column widths
276
        var columnWidths = new int[columns.Count];
28✔
277
        for (var i = 0; i < columns.Count; i++)
286✔
278
        {
115✔
279
            var col = columns[i];
115✔
280
            columnWidths[i] = col.Name.Length;
115✔
281

282
            for (var row = 0; row < rowCount; row++)
1,218✔
283
            {
494✔
284
                var value = FormatValue(col[row]);
494✔
285
                columnWidths[i] = Math.Max(columnWidths[i], value.Length);
494✔
286
            }
494✔
287
        }
115✔
288

289
        // Add row index width
290
        var indexWidth = Math.Max(rowCount.ToString().Length, 0) + 2;
28✔
291

292
        // Header row
293
        sb.Append(new string(' ', indexWidth));
28✔
294
        for (var i = 0; i < columns.Count; i++)
286✔
295
        {
115✔
296
            sb.Append(columns[i].Name.PadLeft(columnWidths[i] + 2));
115✔
297
        }
115✔
298

299
        sb.AppendLine();
28✔
300

301
        // Separator line
302
        sb.Append(new string(' ', indexWidth));
28✔
303
        for (var i = 0; i < columns.Count; i++)
286✔
304
        {
115✔
305
            sb.Append(new string('-', columnWidths[i] + 2));
115✔
306
        }
115✔
307

308
        sb.AppendLine();
28✔
309

310
        // Data rows
311
        for (var row = 0; row < rowCount; row++)
208✔
312
        {
76✔
313
            // Row index
314
            sb.Append(row.ToString().PadLeft(indexWidth));
76✔
315

316
            // Column values
317
            for (var col = 0; col < columns.Count; col++)
1,140✔
318
            {
494✔
319
                var value = FormatValue(columns[col][row]);
494✔
320
                sb.Append(value.PadLeft(columnWidths[col] + 2));
494✔
321
            }
494✔
322

323
            sb.AppendLine();
76✔
324
        }
76✔
325

326
        sb.AppendLine();
28✔
327
        sb.AppendLine($"[{rowCount} rows x {columns.Count} columns]");
28✔
328

329
        return sb.ToString();
28✔
330
    }
30✔
331

332
    private static string FormatValue(object? value)
333
    {
988✔
334
        return value switch
988✔
335
        {
988✔
336
            null => "NaN",
×
337
            double.NaN => "NaN",
4✔
338
            double d => d.ToString("F6"),
680✔
339
            DateOnly date => date.ToString("yyyy-MM-dd"),
×
340
            DateTime { Hour: 0, Minute: 0, Second: 0, Millisecond: 0, Microsecond: 0 } dt => dt.ToString("yyyy-MM-dd"),
150✔
341
            DateTime dt => dt.ToString("yyyy-MM-dd hh:mm:ss"),
2✔
342
            string s => s,
152✔
343
            _ => value.ToString() ?? "null"
×
344
        };
988✔
345
    }
988✔
346
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc