• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

moconnell / yolo / 24042289781

06 Apr 2026 05:24PM UTC coverage: 77.253% (-3.7%) from 80.944%
24042289781

Pull #122

github

web-flow
Merge 23ac25c6e into e42527138
Pull Request #122: Bump Microsoft.ApplicationInsights.WorkerService from 2.23.0 to 3.1.0

310 of 385 branches covered (80.52%)

Branch coverage included in aggregate %.

2390 of 3110 relevant lines covered (76.85%)

16.85 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.93
/src/YoloAbstractions/FactorDataFrame.cs
1
using MathNet.Numerics.LinearAlgebra;
2
using Microsoft.Data.Analysis;
3
using YoloAbstractions.Extensions;
4
using static YoloAbstractions.FactorType;
5

6
namespace YoloAbstractions;
7

8
public sealed record FactorDataFrame
9
{
10
    private const string Date = nameof(Date);
11
    private const string Ticker = nameof(Ticker);
12
    private const string Weight = nameof(Weight);
13
    private readonly DataFrame _dataFrame;
14
    private readonly Dictionary<string, int> _tickerIndex;
15

16
    public FactorDataFrame(DataFrame dataFrame, params FactorType[] factorTypes)
107✔
17
    {
107✔
18
        _dataFrame = dataFrame;
107✔
19
        FactorTypes = factorTypes;
107✔
20
        var i = 0;
107✔
21
        var tickerCol = (StringDataFrameColumn)dataFrame[Ticker];
107✔
22
        var kvps = tickerCol.Select(x => KeyValuePair.Create(x, i++));
349✔
23
        _tickerIndex = new Dictionary<string, int>(kvps, StringComparer.OrdinalIgnoreCase);
107✔
24
    }
107✔
25

26
    public IReadOnlyList<FactorType> FactorTypes { get; init; }
263✔
27

28
    public IReadOnlyList<string> Tickers => [.. (StringDataFrameColumn)_dataFrame[Ticker]];
92✔
29

30
    public bool IsEmpty => _dataFrame.Rows.Count == 0 || FactorTypes.Count == 0 || Tickers.Count == 0;
11✔
31

32
    public static readonly FactorDataFrame Empty = NewFrom([], DateTime.MinValue);
3✔
33

34
    public static FactorDataFrame NewFrom(
35
        IReadOnlyList<string> tickers,
36
        DateTime timestamp,
37
        params (FactorType FactorType, IReadOnlyList<double> Values)[] values)
38
    {
72✔
39
        if (tickers.Select(x => x.ToUpperInvariant()).Distinct().Count() != tickers.Count)
218✔
40
            throw new ArgumentException("Duplicate tickers.", nameof(tickers));
1✔
41

42
        if (values.Select(v => v.FactorType).Distinct().Count() != values.Length)
163✔
43
            throw new ArgumentException("Duplicate factor types.", nameof(values));
1✔
44

45
        foreach (var v in values)
389✔
46
        {
90✔
47
            if (v.Values.Count != tickers.Count)
90✔
48
                throw new ArgumentException(
1✔
49
                    $"Length mismatch for {v.FactorType}: expected {tickers.Count}, got {v.Values.Count}.");
1✔
50
        }
89✔
51

52
        var df = new DataFrame(
69✔
53
        [
69✔
54
            new PrimitiveDataFrameColumn<DateTime>(Date, Enumerable.Repeat(timestamp, tickers.Count)),
69✔
55
            new StringDataFrameColumn(Ticker, tickers),
69✔
56
            ..values.Select(tuple => new DoubleDataFrameColumn(tuple.FactorType.ToString(), tuple.Values))
88✔
57
        ]);
69✔
58
        var factorTypes = values.Select(tuple => tuple.FactorType).ToArray();
157✔
59

60
        return new FactorDataFrame(df, factorTypes);
69✔
61
    }
69✔
62

63
    public double this[FactorType factorType, string ticker]
64
    {
65
        get
66
        {
75✔
67
            if (!FactorTypes.Contains(factorType) || !_tickerIndex.TryGetValue(ticker, out var index))
75✔
68
            {
3✔
69
                return double.NaN;
3✔
70
            }
71

72
            var val = (double?)_dataFrame[factorType.ToString()][index];
72✔
73
            return val ?? double.NaN;
72✔
74
        }
75✔
75
    }
76

77
    // Indexer by ticker symbol
78
    public IReadOnlyDictionary<FactorType, double> this[string ticker]
79
    {
80
        get
81
        {
15✔
82
            // locate the matching row(s)
83
            if (!_tickerIndex.TryGetValue(ticker, out var rowIndex))
15✔
84
                throw new KeyNotFoundException($"{ticker} does not exist");
1✔
85

86
            // build a dictionary FactorType -> value
87
            var dict = new Dictionary<FactorType, double>();
14✔
88

89
            foreach (var factorType in FactorTypes)
78✔
90
            {
18✔
91
                var col = (DoubleDataFrameColumn)_dataFrame[factorType.ToString()];
18✔
92
                dict[factorType] = col[rowIndex] ?? double.NaN;
18✔
93
            }
18✔
94

95
            return dict;
14✔
96
        }
14✔
97
    }
98

99
    public static FactorDataFrame operator +(FactorDataFrame one, FactorDataFrame two)
100
    {
16✔
101
        ArgumentNullException.ThrowIfNull(one);
16✔
102
        ArgumentNullException.ThrowIfNull(two);
15✔
103

104
        if (one.Tickers.Count != two.Tickers.Count ||
14✔
105
            one.Tickers.Except(two.Tickers, StringComparer.OrdinalIgnoreCase).Any())
14✔
106
            throw new ArgumentException("Ticker sets must match.");
3✔
107

108
        var sharedCols = one._dataFrame.Columns
11✔
109
            .Select(c => c.Name)
36✔
110
            .Intersect(two._dataFrame.Columns.Select(c => c.Name))
35✔
111
            .Where(name => !string.Equals(name, Ticker, StringComparison.Ordinal) &&
24✔
112
                           !string.Equals(name, Date, StringComparison.Ordinal))
24✔
113
            .ToArray();
11✔
114
        if (sharedCols.Length != 0)
11✔
115
            throw new ArgumentException(
2✔
116
                $"Cannot merge DataFrames with overlapping Factor columns: {string.Join(", ", sharedCols)}");
2✔
117

118
        var joinedColumns = one._dataFrame.Columns
9✔
119
            .UnionBy(two._dataFrame.Columns, c => c.Name)
57✔
120
            .ToArray();
9✔
121
        var df = new DataFrame(joinedColumns);
9✔
122
        var joinedFactorTypes = one.FactorTypes.Union(two.FactorTypes).Distinct().ToArray();
9✔
123

124
        return new FactorDataFrame(df, joinedFactorTypes);
9✔
125
    }
9✔
126

127
    public FactorDataFrame Normalize(NormalizationMethod method = NormalizationMethod.None, int? quantiles = null, params FactorType[] preserveFactors)
128
    {
17✔
129
        if (method == NormalizationMethod.None)
17✔
130
            return this;
7✔
131

132
        if (method == NormalizationMethod.CrossSectionalBins && (!quantiles.HasValue || quantiles <= 0))
10✔
133
        {
×
134
            throw new ArgumentException($"{nameof(quantiles)}: quantiles must be a positive integer when using CrossSectionalBins normalization.");
×
135
        }
136

137
        var normalizedColumns = new List<DataFrameColumn>
10✔
138
        {
10✔
139
            _dataFrame[Date],
10✔
140
            _dataFrame[Ticker]
10✔
141
        };
10✔
142

143
        foreach (var factorType in FactorTypes)
53✔
144
        {
12✔
145
            var colName = factorType.ToString();
12✔
146
            DoubleDataFrameColumn col = (DoubleDataFrameColumn)_dataFrame[colName];
12✔
147

148
            if (preserveFactors.Contains(factorType))
12✔
149
            {
1✔
150
                normalizedColumns.Add(col);
1✔
151
                continue;
1✔
152
            }
153

154
            var normalizedCol = method switch
11✔
155
            {
11✔
156
                NormalizationMethod.CrossSectionalBins => col.NormalizeBins(quantiles!.Value),
4✔
157
                NormalizationMethod.CrossSectionalZScore => col.NormalizeZScore(),
2✔
158
                NormalizationMethod.MinMax => col.NormalizeMinMax(),
2✔
159
                NormalizationMethod.Rank => col.NormalizeRank(),
2✔
160
                _ => throw new ArgumentOutOfRangeException(
1✔
161
                    nameof(method),
1✔
162
                    method,
1✔
163
                    $"Unknown normalization method: {method}")
1✔
164
            };
11✔
165

166
            normalizedColumns.Add(new DoubleDataFrameColumn(colName, normalizedCol));
10✔
167
        }
10✔
168

169
        var normalizedDf = new DataFrame(normalizedColumns);
9✔
170

171
        return new FactorDataFrame(normalizedDf, [.. FactorTypes]);
9✔
172
    }
16✔
173

174
    public DataFrame ApplyWeights(
175
        IReadOnlyDictionary<FactorType, double> weights,
176
        double? maxWeightAbs = null,
177
        bool volatilityScaling = true,
178
        bool normalizePerAsset = true,
179
        NormalizationMethod normalizationMethod = NormalizationMethod.None,
180
        int? quantilesForNormalization = null)
181
    {
14✔
182
        ArgumentNullException.ThrowIfNull(weights);
14✔
183

184
        var factorCols = _dataFrame.Columns
13✔
185
            .Skip(2)
13✔
186
            .Where(c => !string.Equals(c.Name, nameof(Volatility), StringComparison.Ordinal))
30✔
187
            .OfType<DoubleDataFrameColumn>()
13✔
188
            .OrderBy(c => c.Name, StringComparer.Ordinal)
19✔
189
            .ToArray();
13✔
190
        var alignedWeights = factorCols
13✔
191
            .Select(c => Enum.TryParse<FactorType>(c.Name, out var ft) && weights.TryGetValue(ft, out var w) ? w : 0d)
25✔
192
            .ToArray();
13✔
193

194
        var rows = (int)_dataFrame.Rows.Count;
13✔
195
        var columns = factorCols.Length;
13✔
196

197
        var tickerWeightsVector = GetWeights();
13✔
198

199
        var weightsCol = new DoubleDataFrameColumn(Weight, tickerWeightsVector)
13✔
200
                .Normalize(normalizationMethod, quantilesForNormalization);
13✔
201

202
        if (volatilityScaling &&
13✔
203
            _dataFrame.Columns.FirstOrDefault(c => c.Name == nameof(Volatility)) is DoubleDataFrameColumn volCol)
57✔
204
        {
3✔
205
            var vol = Vector<double>.Build.DenseOfArray([.. volCol.Select(x => x is > 0d ? x.Value : 1d)]);
11✔
206
            weightsCol = weightsCol.PointwiseDivide(vol);
3✔
207
        }
3✔
208

209
        if (maxWeightAbs.HasValue)
13✔
210
        {
3✔
211
            var clampedWeights = weightsCol.Select(x => Math.Clamp(x.GetValueOrDefault(), -maxWeightAbs.Value, maxWeightAbs.Value));
11✔
212
            weightsCol = new DoubleDataFrameColumn(Weight, clampedWeights);
3✔
213
        }
3✔
214

215
        weightsCol = weightsCol.NormalizeGrossAbs(1.0);
13✔
216

217
        var resultDf = new DataFrame();
13✔
218
        resultDf.Columns.Add(_dataFrame[Date]);
13✔
219
        resultDf.Columns.Add(_dataFrame[Ticker]);
13✔
220
        resultDf.Columns.Add(weightsCol);
13✔
221

222
        return resultDf;
13✔
223

224
        Vector<double> GetWeights()
225
        {
226
            if (normalizePerAsset && HasMissingValues())
227
            {
228
                // Calculate weight per asset, normalizing only by available factors
229
                var tickerWeights = new double[rows];
230
                for (var row = 0; row < rows; row++)
231
                {
232
                    double weightSum = 0;
233
                    double normalizerSum = 0;
234

235
                    for (var col = 0; col < columns; col++)
236
                    {
237
                        var value = factorCols[col][row];
238
                        if (value.HasValue && !double.IsNaN(value.Value))
239
                        {
240
                            weightSum += value.Value * alignedWeights[col];
241
                            normalizerSum += Math.Abs(alignedWeights[col]);
242
                        }
243
                    }
244

245
                    tickerWeights[row] = normalizerSum > 0 ? weightSum / normalizerSum : 0;
246
                }
247

248
                var tickerWeightsVector = Vector<double>.Build.DenseOfArray(tickerWeights);
249

250
                return tickerWeightsVector;
251
            }
252
            else
253
            {
254
                var data = factorCols
255
                    .Select(c => c.Select(x => x ?? 0d).ToArray())
256
                    .ToArray();
257

258
                var m = Matrix<double>.Build.DenseOfColumns(rows, columns, data);
259
                var v = Vector<double>.Build.DenseOfArray(alignedWeights);
260

261
                var normalizer = alignedWeights.Sum(Math.Abs);
262
                if (normalizer <= 0)
263
                    normalizer = 1;
264
                var tickerWeights = m * v / normalizer; // (rows x 1)
265

266
                return tickerWeights;
267
            }
268
        }
269

270
        bool HasMissingValues()
271
        {
272
            return factorCols.Any(c => c.Any(value => value is null or double.NaN));
273
        }
274
    }
13✔
275

276
    public override string ToString()
277
    {
18✔
278
        if (_dataFrame.Rows.Count == 0)
18✔
279
            return "Empty FactorDataFrame";
2✔
280

281
        var sb = new System.Text.StringBuilder();
16✔
282
        var columns = _dataFrame.Columns.ToList();
16✔
283
        var rowCount = (int)_dataFrame.Rows.Count;
16✔
284

285
        // Calculate column widths
286
        var columnWidths = new int[columns.Count];
16✔
287
        for (var i = 0; i < columns.Count; i++)
142✔
288
        {
55✔
289
            var col = columns[i];
55✔
290
            columnWidths[i] = col.Name.Length;
55✔
291

292
            for (var row = 0; row < rowCount; row++)
306✔
293
            {
98✔
294
                var value = FormatValue(col[row]);
98✔
295
                columnWidths[i] = Math.Max(columnWidths[i], value.Length);
98✔
296
            }
98✔
297
        }
55✔
298

299
        // Add row index width
300
        var indexWidth = Math.Max(rowCount.ToString().Length, 0) + 2;
16✔
301

302
        // Header row
303
        sb.Append(new string(' ', indexWidth));
16✔
304
        for (var i = 0; i < columns.Count; i++)
142✔
305
        {
55✔
306
            sb.Append(columns[i].Name.PadLeft(columnWidths[i] + 2));
55✔
307
        }
55✔
308

309
        sb.AppendLine();
16✔
310

311
        // Separator line
312
        sb.Append(new string(' ', indexWidth));
16✔
313
        for (var i = 0; i < columns.Count; i++)
142✔
314
        {
55✔
315
            sb.Append(new string('-', columnWidths[i] + 2));
55✔
316
        }
55✔
317

318
        sb.AppendLine();
16✔
319

320
        // Data rows
321
        for (var row = 0; row < rowCount; row++)
88✔
322
        {
28✔
323
            // Row index
324
            sb.Append(row.ToString().PadLeft(indexWidth));
28✔
325

326
            // Column values
327
            for (var col = 0; col < columns.Count; col++)
252✔
328
            {
98✔
329
                var value = FormatValue(columns[col][row]);
98✔
330
                sb.Append(value.PadLeft(columnWidths[col] + 2));
98✔
331
            }
98✔
332

333
            sb.AppendLine();
28✔
334
        }
28✔
335

336
        sb.AppendLine();
16✔
337
        sb.AppendLine($"[{rowCount} rows x {columns.Count} columns]");
16✔
338

339
        return sb.ToString();
16✔
340
    }
18✔
341

342
    private static string FormatValue(object? value)
343
    {
196✔
344
        return value switch
196✔
345
        {
196✔
346
            null => "NaN",
×
347
            double.NaN => "NaN",
×
348
            double d => d.ToString("F6"),
84✔
349
            DateOnly date => date.ToString("yyyy-MM-dd"),
×
350
            DateTime { Hour: 0, Minute: 0, Second: 0, Millisecond: 0, Microsecond: 0 } dt => dt.ToString("yyyy-MM-dd"),
54✔
351
            DateTime dt => dt.ToString("yyyy-MM-dd hh:mm:ss"),
2✔
352
            string s => s,
56✔
353
            _ => value.ToString() ?? "null"
×
354
        };
196✔
355
    }
196✔
356
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc