• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

grpc / grpc-java / #20233

02 Apr 2026 12:18PM UTC coverage: 88.792% (+0.008%) from 88.784%
#20233

push

github

web-flow
[xds] Implement A114: WRR support for custom backend metrics (#12645)

### Description
This PR implements [gRFC A114: WRR Support for Custom Backend
Metrics](https://github.com/grpc/proposal/pull/536).

It updates the `weighted_round_robin` policy to allow users to configure
which backend metrics drive the load balancing weights.

### Key Changes
* **Configuration**: Supports the new
`metric_names_for_computing_utilization` field in
`WeightedRoundRobinLbConfig`.
* **Weight Calculation**: Implements logic to resolve custom metrics
(including map lookups like `named_metrics.foo`) when
`application_utilization` is absent.
* **Refactor**: Centralizes the complex metric lookup and validation
logic (checking for NaN, <= 0, etc.) into a new internal utility
`MetricReportUtils`.
* **Testing**: Verifies correct precedence: `application_utilization` >
`custom_metrics` (max valid value) > `cpu_utilization`.

36007 of 40552 relevant lines covered (88.79%)

0.89 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.55
/../xds/src/main/java/io/grpc/xds/WeightedRoundRobinLoadBalancer.java
1
/*
2
 * Copyright 2023 The gRPC Authors
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at
7
 *
8
 *     http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16

17
package io.grpc.xds;
18

19
import static com.google.common.base.Preconditions.checkArgument;
20
import static com.google.common.base.Preconditions.checkNotNull;
21

22
import com.google.common.annotations.VisibleForTesting;
23
import com.google.common.base.MoreObjects;
24
import com.google.common.base.Preconditions;
25
import com.google.common.collect.ImmutableList;
26
import com.google.common.collect.Lists;
27
import io.grpc.ConnectivityState;
28
import io.grpc.ConnectivityStateInfo;
29
import io.grpc.Deadline.Ticker;
30
import io.grpc.DoubleHistogramMetricInstrument;
31
import io.grpc.EquivalentAddressGroup;
32
import io.grpc.LoadBalancer;
33
import io.grpc.LoadBalancerProvider;
34
import io.grpc.LongCounterMetricInstrument;
35
import io.grpc.MetricInstrumentRegistry;
36
import io.grpc.NameResolver;
37
import io.grpc.Status;
38
import io.grpc.SynchronizationContext;
39
import io.grpc.SynchronizationContext.ScheduledHandle;
40
import io.grpc.services.MetricReport;
41
import io.grpc.util.ForwardingSubchannel;
42
import io.grpc.util.MultiChildLoadBalancer;
43
import io.grpc.xds.internal.MetricReportUtils;
44
import io.grpc.xds.orca.OrcaOobUtil;
45
import io.grpc.xds.orca.OrcaOobUtil.OrcaOobReportListener;
46
import io.grpc.xds.orca.OrcaPerRequestUtil;
47
import io.grpc.xds.orca.OrcaPerRequestUtil.OrcaPerRequestReportListener;
48
import java.util.ArrayList;
49
import java.util.Collection;
50
import java.util.HashSet;
51
import java.util.List;
52
import java.util.Objects;
53
import java.util.OptionalDouble;
54
import java.util.Random;
55
import java.util.Set;
56
import java.util.concurrent.ScheduledExecutorService;
57
import java.util.concurrent.TimeUnit;
58
import java.util.concurrent.atomic.AtomicInteger;
59
import java.util.logging.Level;
60
import java.util.logging.Logger;
61

62
/**
63
 * A {@link LoadBalancer} that provides weighted-round-robin load-balancing over the
64
 * {@link EquivalentAddressGroup}s from the {@link NameResolver}. The subchannel weights are
65
 * determined by backend metrics using ORCA.
66
 * To use WRR, users may configure through channel serviceConfig. Example config:
67
 * <pre> {@code
68
 *       String wrrConfig = "{\"loadBalancingConfig\":" +
69
 *           "[{\"weighted_round_robin\":{\"enableOobLoadReport\":true, " +
70
 *           "\"blackoutPeriod\":\"10s\"," +
71
 *           "\"oobReportingPeriod\":\"10s\"," +
72
 *           "\"weightExpirationPeriod\":\"180s\"," +
73
 *           "\"errorUtilizationPenalty\":\"1.0\"," +
74
 *           "\"weightUpdatePeriod\":\"1s\"}}]}";
75
 *        serviceConfig = (Map<String, ?>) JsonParser.parse(wrrConfig);
76
 *        channel = ManagedChannelBuilder.forTarget("test:///lb.test.grpc.io")
77
 *            .defaultServiceConfig(serviceConfig)
78
 *            .build();
79
 *  }
80
 *  </pre>
81
 *  Users may also configure through xDS control plane via custom lb policy. But that is much more
82
 *  complex to set up. Example config:
83
 *  <pre>
84
 *  localityLbPolicies:
85
 *   - customPolicy:
86
 *       name: weighted_round_robin
87
 *       data: '{ "enableOobLoadReport": true }'
88
 *  </pre>
89
 *  See related documentation: https://cloud.google.com/service-mesh/legacy/load-balancing-apis/proxyless-configure-advanced-traffic-management#custom-lb-config
90
 */
91
final class WeightedRoundRobinLoadBalancer extends MultiChildLoadBalancer {
92

93
  private static final LongCounterMetricInstrument RR_FALLBACK_COUNTER;
94
  private static final LongCounterMetricInstrument ENDPOINT_WEIGHT_NOT_YET_USEABLE_COUNTER;
95
  private static final LongCounterMetricInstrument ENDPOINT_WEIGHT_STALE_COUNTER;
96
  private static final DoubleHistogramMetricInstrument ENDPOINT_WEIGHTS_HISTOGRAM;
97
  private static final Logger log = Logger.getLogger(
1✔
98
      WeightedRoundRobinLoadBalancer.class.getName());
1✔
99
  private WeightedRoundRobinLoadBalancerConfig config;
100
  private final SynchronizationContext syncContext;
101
  private final ScheduledExecutorService timeService;
102
  private ScheduledHandle weightUpdateTimer;
103
  private final Runnable updateWeightTask;
104
  private final AtomicInteger sequence;
105
  private final long infTime;
106
  private final Ticker ticker;
107
  private String locality = "";
1✔
108
  private String backendService = "";
1✔
109
  private SubchannelPicker currentPicker = new FixedResultPicker(PickResult.withNoResult());
1✔
110

111
  // The metric instruments are only registered once and shared by all instances of this LB.
112
  static {
113
    MetricInstrumentRegistry metricInstrumentRegistry
114
        = MetricInstrumentRegistry.getDefaultRegistry();
1✔
115
    RR_FALLBACK_COUNTER = metricInstrumentRegistry.registerLongCounter(
1✔
116
        "grpc.lb.wrr.rr_fallback",
117
        "EXPERIMENTAL. Number of scheduler updates in which there were not enough endpoints "
118
            + "with valid weight, which caused the WRR policy to fall back to RR behavior",
119
        "{update}",
120
        Lists.newArrayList("grpc.target"),
1✔
121
        Lists.newArrayList("grpc.lb.locality", "grpc.lb.backend_service"),
1✔
122
        false);
123
    ENDPOINT_WEIGHT_NOT_YET_USEABLE_COUNTER = metricInstrumentRegistry.registerLongCounter(
1✔
124
        "grpc.lb.wrr.endpoint_weight_not_yet_usable",
125
        "EXPERIMENTAL. Number of endpoints from each scheduler update that don't yet have usable "
126
            + "weight information",
127
        "{endpoint}",
128
        Lists.newArrayList("grpc.target"),
1✔
129
        Lists.newArrayList("grpc.lb.locality", "grpc.lb.backend_service"),
1✔
130
        false);
131
    ENDPOINT_WEIGHT_STALE_COUNTER = metricInstrumentRegistry.registerLongCounter(
1✔
132
        "grpc.lb.wrr.endpoint_weight_stale",
133
        "EXPERIMENTAL. Number of endpoints from each scheduler update whose latest weight is "
134
            + "older than the expiration period",
135
        "{endpoint}",
136
        Lists.newArrayList("grpc.target"),
1✔
137
        Lists.newArrayList("grpc.lb.locality", "grpc.lb.backend_service"),
1✔
138
        false);
139
    ENDPOINT_WEIGHTS_HISTOGRAM = metricInstrumentRegistry.registerDoubleHistogram(
1✔
140
        "grpc.lb.wrr.endpoint_weights",
141
        "EXPERIMENTAL. The histogram buckets will be endpoint weight ranges.",
142
        "{weight}",
143
        Lists.newArrayList(),
1✔
144
        Lists.newArrayList("grpc.target"),
1✔
145
        Lists.newArrayList("grpc.lb.locality", "grpc.lb.backend_service"),
1✔
146
        false);
147
  }
1✔
148

149
  public WeightedRoundRobinLoadBalancer(Helper helper, Ticker ticker) {
150
    this(helper, ticker, new Random());
1✔
151
  }
1✔
152

153
  @VisibleForTesting
154
  WeightedRoundRobinLoadBalancer(Helper helper, Ticker ticker, Random random) {
155
    super(OrcaOobUtil.newOrcaReportingHelper(helper));
1✔
156
    this.ticker = checkNotNull(ticker, "ticker");
1✔
157
    this.infTime = ticker.nanoTime() + Long.MAX_VALUE;
1✔
158
    this.syncContext = checkNotNull(helper.getSynchronizationContext(), "syncContext");
1✔
159
    this.timeService = checkNotNull(helper.getScheduledExecutorService(), "timeService");
1✔
160
    this.updateWeightTask = new UpdateWeightTask();
1✔
161
    this.sequence = new AtomicInteger(random.nextInt());
1✔
162
    log.log(Level.FINE, "weighted_round_robin LB created");
1✔
163
  }
1✔
164

165
  @Override
166
  protected ChildLbState createChildLbState(Object key) {
167
    return new WeightedChildLbState(key, pickFirstLbProvider);
1✔
168
  }
169

170
  @Override
171
  public Status acceptResolvedAddresses(ResolvedAddresses resolvedAddresses) {
172
    if (resolvedAddresses.getLoadBalancingPolicyConfig() == null) {
1✔
173
      Status unavailableStatus = Status.UNAVAILABLE.withDescription(
1✔
174
              "NameResolver returned no WeightedRoundRobinLoadBalancerConfig. addrs="
175
                      + resolvedAddresses.getAddresses()
1✔
176
                      + ", attrs=" + resolvedAddresses.getAttributes());
1✔
177
      handleNameResolutionError(unavailableStatus);
1✔
178
      return unavailableStatus;
1✔
179
    }
180
    String locality = resolvedAddresses.getAttributes().get(WeightedTargetLoadBalancer.CHILD_NAME);
1✔
181
    if (locality != null) {
1✔
182
      this.locality = locality;
1✔
183
    } else {
184
      this.locality = "";
1✔
185
    }
186
    String backendService
1✔
187
        = resolvedAddresses.getAttributes().get(NameResolver.ATTR_BACKEND_SERVICE);
1✔
188
    if (backendService != null) {
1✔
189
      this.backendService = backendService;
1✔
190
    } else {
191
      this.backendService = "";
1✔
192
    }
193
    config =
1✔
194
        (WeightedRoundRobinLoadBalancerConfig) resolvedAddresses.getLoadBalancingPolicyConfig();
1✔
195

196
    if (weightUpdateTimer != null && weightUpdateTimer.isPending()) {
1✔
197
      weightUpdateTimer.cancel();
1✔
198
    }
199
    updateWeightTask.run();
1✔
200

201
    Status status = super.acceptResolvedAddresses(resolvedAddresses);
1✔
202

203
    createAndApplyOrcaListeners();
1✔
204

205
    return status;
1✔
206
  }
207

208
  /**
209
   * Updates picker with the list of active subchannels (state == READY).
210
   */
211
  @Override
212
  protected void updateOverallBalancingState() {
213
    List<ChildLbState> activeList = getReadyChildren();
1✔
214
    if (activeList.isEmpty()) {
1✔
215
      // No READY subchannels
216

217
      // MultiChildLB will request connection immediately on subchannel IDLE.
218
      boolean isConnecting = false;
1✔
219
      for (ChildLbState childLbState : getChildLbStates()) {
1✔
220
        ConnectivityState state = childLbState.getCurrentState();
1✔
221
        if (state == ConnectivityState.CONNECTING || state == ConnectivityState.IDLE) {
1✔
222
          isConnecting = true;
1✔
223
          break;
1✔
224
        }
225
      }
1✔
226

227
      if (isConnecting) {
1✔
228
        updateBalancingState(
1✔
229
            ConnectivityState.CONNECTING, new FixedResultPicker(PickResult.withNoResult()));
1✔
230
      } else {
231
        updateBalancingState(
1✔
232
            ConnectivityState.TRANSIENT_FAILURE, createReadyPicker(getChildLbStates()));
1✔
233
      }
234
    } else {
1✔
235
      updateBalancingState(ConnectivityState.READY, createReadyPicker(activeList));
1✔
236
    }
237
  }
1✔
238

239
  private SubchannelPicker createReadyPicker(Collection<ChildLbState> activeList) {
240
    WeightedRoundRobinPicker picker = new WeightedRoundRobinPicker(ImmutableList.copyOf(activeList),
1✔
241
        config.enableOobLoadReport, config.errorUtilizationPenalty, sequence,
242
        config.metricNamesForComputingUtilization);
243
    updateWeight(picker);
1✔
244
    return picker;
1✔
245
  }
246

247
  private void updateWeight(WeightedRoundRobinPicker picker) {
248
    Helper helper = getHelper();
1✔
249
    float[] newWeights = new float[picker.children.size()];
1✔
250
    AtomicInteger staleEndpoints = new AtomicInteger();
1✔
251
    AtomicInteger notYetUsableEndpoints = new AtomicInteger();
1✔
252
    for (int i = 0; i < picker.children.size(); i++) {
1✔
253
      double newWeight = ((WeightedChildLbState) picker.children.get(i)).getWeight(staleEndpoints,
1✔
254
          notYetUsableEndpoints);
255
      helper.getMetricRecorder()
1✔
256
          .recordDoubleHistogram(ENDPOINT_WEIGHTS_HISTOGRAM, newWeight,
1✔
257
              ImmutableList.of(helper.getChannelTarget()),
1✔
258
              ImmutableList.of(locality, backendService));
1✔
259
      newWeights[i] = newWeight > 0 ? (float) newWeight : 0.0f;
1✔
260
    }
261

262
    if (staleEndpoints.get() > 0) {
1✔
263
      helper.getMetricRecorder()
1✔
264
          .addLongCounter(ENDPOINT_WEIGHT_STALE_COUNTER, staleEndpoints.get(),
1✔
265
              ImmutableList.of(helper.getChannelTarget()),
1✔
266
              ImmutableList.of(locality, backendService));
1✔
267
    }
268
    if (notYetUsableEndpoints.get() > 0) {
1✔
269
      helper.getMetricRecorder()
1✔
270
          .addLongCounter(ENDPOINT_WEIGHT_NOT_YET_USEABLE_COUNTER, notYetUsableEndpoints.get(),
1✔
271
              ImmutableList.of(helper.getChannelTarget()),
1✔
272
              ImmutableList.of(locality, backendService));
1✔
273
    }
274
    boolean weightsEffective = picker.updateWeight(newWeights);
1✔
275
    if (!weightsEffective) {
1✔
276
      helper.getMetricRecorder()
1✔
277
          .addLongCounter(RR_FALLBACK_COUNTER, 1, ImmutableList.of(helper.getChannelTarget()),
1✔
278
              ImmutableList.of(locality, backendService));
1✔
279
    }
280
  }
1✔
281

282
  private void updateBalancingState(ConnectivityState state, SubchannelPicker picker) {
283
    if (state != currentConnectivityState || !picker.equals(currentPicker)) {
1✔
284
      getHelper().updateBalancingState(state, picker);
1✔
285
      currentConnectivityState = state;
1✔
286
      currentPicker = picker;
1✔
287
    }
288
  }
1✔
289

290
  @VisibleForTesting
291
  final class WeightedChildLbState extends ChildLbState {
292

293
    private final Set<WrrSubchannel> subchannels = new HashSet<>();
1✔
294
    private volatile long lastUpdated;
295
    private volatile long nonEmptySince;
296
    private volatile double weight = 0;
1✔
297

298
    private OrcaReportListener orcaReportListener;
299

300
    public WeightedChildLbState(Object key, LoadBalancerProvider policyProvider) {
1✔
301
      super(key, policyProvider);
1✔
302
    }
1✔
303

304
    @Override
305
    protected ChildLbStateHelper createChildHelper() {
306
      return new WrrChildLbStateHelper();
1✔
307
    }
308

309
    private double getWeight(AtomicInteger staleEndpoints, AtomicInteger notYetUsableEndpoints) {
310
      if (config == null) {
1✔
311
        return 0;
×
312
      }
313
      long now = ticker.nanoTime();
1✔
314
      if (now - lastUpdated >= config.weightExpirationPeriodNanos) {
1✔
315
        nonEmptySince = infTime;
1✔
316
        staleEndpoints.incrementAndGet();
1✔
317
        return 0;
1✔
318
      } else if (now - nonEmptySince < config.blackoutPeriodNanos
1✔
319
          && config.blackoutPeriodNanos > 0) {
1✔
320
        notYetUsableEndpoints.incrementAndGet();
1✔
321
        return 0;
1✔
322
      } else {
323
        return weight;
1✔
324
      }
325
    }
326

327
    public void addSubchannel(WrrSubchannel wrrSubchannel) {
328
      subchannels.add(wrrSubchannel);
1✔
329
    }
1✔
330

331
    public OrcaReportListener getOrCreateOrcaListener(float errorUtilizationPenalty,
332
        ImmutableList<String> metricNamesForComputingUtilization) {
333
      if (orcaReportListener != null
1✔
334
          && orcaReportListener.errorUtilizationPenalty == errorUtilizationPenalty
1✔
335
          && orcaReportListener.metricNamesForComputingUtilization
1✔
336
              .equals(metricNamesForComputingUtilization)) {
1✔
337
        return orcaReportListener;
1✔
338
      }
339
      orcaReportListener =
1✔
340
          new OrcaReportListener(errorUtilizationPenalty, metricNamesForComputingUtilization);
341
      return orcaReportListener;
1✔
342
    }
343

344
    public void removeSubchannel(WrrSubchannel wrrSubchannel) {
345
      subchannels.remove(wrrSubchannel);
1✔
346
    }
1✔
347

348
    final class WrrChildLbStateHelper extends ChildLbStateHelper {
1✔
349
      @Override
350
      public Subchannel createSubchannel(CreateSubchannelArgs args) {
351
        return new WrrSubchannel(super.createSubchannel(args), WeightedChildLbState.this);
1✔
352
      }
353

354
      @Override
355
      public void updateBalancingState(ConnectivityState newState, SubchannelPicker newPicker) {
356
        super.updateBalancingState(newState, newPicker);
1✔
357
        if (!resolvingAddresses && newState == ConnectivityState.IDLE) {
1✔
358
          getLb().requestConnection();
×
359
        }
360
      }
1✔
361
    }
362

363
    final class OrcaReportListener implements OrcaPerRequestReportListener, OrcaOobReportListener {
364
      private final float errorUtilizationPenalty;
365
      private final ImmutableList<String> metricNamesForComputingUtilization;
366

367
      OrcaReportListener(float errorUtilizationPenalty,
368
          ImmutableList<String> metricNamesForComputingUtilization) {
1✔
369
        this.errorUtilizationPenalty = errorUtilizationPenalty;
1✔
370
        this.metricNamesForComputingUtilization = metricNamesForComputingUtilization;
1✔
371
      }
1✔
372

373
      @Override
374
      public void onLoadReport(MetricReport report) {
375
        double utilization = getUtilization(report, metricNamesForComputingUtilization);
1✔
376

377
        double newWeight = 0;
1✔
378
        if (utilization > 0 && report.getQps() > 0) {
1✔
379
          double penalty = 0;
1✔
380
          if (report.getEps() > 0 && errorUtilizationPenalty > 0) {
1✔
381
            penalty = report.getEps() / report.getQps() * errorUtilizationPenalty;
1✔
382
          }
383
          newWeight = report.getQps() / (utilization + penalty);
1✔
384
        }
385
        if (newWeight == 0) {
1✔
386
          return;
1✔
387
        }
388
        if (nonEmptySince == infTime) {
1✔
389
          nonEmptySince = ticker.nanoTime();
1✔
390
        }
391
        lastUpdated = ticker.nanoTime();
1✔
392
        weight = newWeight;
1✔
393
      }
1✔
394

395
      /**
396
       * Returns the utilization value computed from the specified metric names. If the custom
397
       * metrics are present and valid, the maximum of the custom metrics is returned. Otherwise,
398
       * if application utilization is > 0, it is returned. If neither are present, the CPU
399
       * utilization is returned.
400
       */
401
      private double getUtilization(MetricReport report, ImmutableList<String> metricNames) {
402
        OptionalDouble customUtil = getCustomMetricUtilization(report, metricNames);
1✔
403
        if (customUtil.isPresent()) {
1✔
404
          return customUtil.getAsDouble();
1✔
405
        }
406
        double appUtil = report.getApplicationUtilization();
1✔
407
        if (appUtil > 0) {
1✔
408
          return appUtil;
1✔
409
        }
410
        return report.getCpuUtilization();
1✔
411
      }
412

413
      /**
414
       * Returns the maximum utilization value among the specified metric names.
415
       * Returns OptionalDouble.empty() if NONE of the specified metrics are present in the report,
416
       * or if all present metrics are NaN.
417
       * Returns OptionalDouble.of(maxUtil) if at least one non-NaN metric is present.
418
       */
419
      private OptionalDouble getCustomMetricUtilization(MetricReport report,
420
          ImmutableList<String> metricNames) {
421
        return metricNames.stream()
1✔
422
            .map(name -> MetricReportUtils.getMetric(report, name))
1✔
423
            .filter(OptionalDouble::isPresent)
1✔
424
            .mapToDouble(OptionalDouble::getAsDouble)
1✔
425
            .filter(d -> !Double.isNaN(d) && d > 0)
1✔
426
            .max();
1✔
427
      }
428
    }
429
  }
430

431
  private final class UpdateWeightTask implements Runnable {
1✔
432
    @Override
433
    public void run() {
434
      if (currentPicker != null && currentPicker instanceof WeightedRoundRobinPicker) {
1✔
435
        updateWeight((WeightedRoundRobinPicker) currentPicker);
1✔
436
      }
437
      weightUpdateTimer = syncContext.schedule(this, config.weightUpdatePeriodNanos,
1✔
438
          TimeUnit.NANOSECONDS, timeService);
1✔
439
    }
1✔
440
  }
441

442
  private void createAndApplyOrcaListeners() {
443
    for (ChildLbState child : getChildLbStates()) {
1✔
444
      WeightedChildLbState wChild = (WeightedChildLbState) child;
1✔
445
      for (WrrSubchannel weightedSubchannel : wChild.subchannels) {
1✔
446
        if (config.enableOobLoadReport) {
1✔
447
          OrcaOobUtil.setListener(weightedSubchannel,
1✔
448
              wChild.getOrCreateOrcaListener(config.errorUtilizationPenalty,
1✔
449
                      config.metricNamesForComputingUtilization),
450
              OrcaOobUtil.OrcaReportingConfig.newBuilder()
1✔
451
                  .setReportInterval(config.oobReportingPeriodNanos, TimeUnit.NANOSECONDS).build());
1✔
452
        } else {
453
          OrcaOobUtil.setListener(weightedSubchannel, null, null);
1✔
454
        }
455
      }
1✔
456
    }
1✔
457
  }
1✔
458

459
  @Override
460
  public void shutdown() {
461
    if (weightUpdateTimer != null) {
1✔
462
      weightUpdateTimer.cancel();
1✔
463
    }
464
    super.shutdown();
1✔
465
  }
1✔
466

467
  @VisibleForTesting
468
  final class WrrSubchannel extends ForwardingSubchannel {
469
    private final Subchannel delegate;
470
    private final WeightedChildLbState owner;
471

472
    WrrSubchannel(Subchannel delegate, WeightedChildLbState owner) {
1✔
473
      this.delegate = checkNotNull(delegate, "delegate");
1✔
474
      this.owner = checkNotNull(owner, "owner");
1✔
475
    }
1✔
476

477
    @Override
478
    public void start(SubchannelStateListener listener) {
479
      owner.addSubchannel(this);
1✔
480
      delegate().start(new SubchannelStateListener() {
1✔
481
        @Override
482
        public void onSubchannelState(ConnectivityStateInfo newState) {
483
          if (newState.getState().equals(ConnectivityState.READY)) {
1✔
484
            owner.nonEmptySince = infTime;
1✔
485
          }
486
          listener.onSubchannelState(newState);
1✔
487
        }
1✔
488
      });
489
    }
1✔
490

491
    @Override
492
    protected Subchannel delegate() {
493
      return delegate;
1✔
494
    }
495

496
    @Override
497
    public void shutdown() {
498
      super.shutdown();
1✔
499
      owner.removeSubchannel(this);
1✔
500
    }
1✔
501
  }
502

503
  @VisibleForTesting
504
  static final class WeightedRoundRobinPicker extends SubchannelPicker {
505
    // Parallel lists (column-based storage instead of normal row-based storage of List<Struct>).
506
    // The ith element of children corresponds to the ith element of pickers, listeners, and even
507
    // updateWeight(float[]).
508
    private final List<ChildLbState> children; // May only be accessed from sync context
509
    private final List<SubchannelPicker> pickers;
510
    private final List<OrcaPerRequestReportListener> reportListeners;
511
    private final boolean enableOobLoadReport;
512
    private final float errorUtilizationPenalty;
513
    private final AtomicInteger sequence;
514
    private final int hashCode;
515
    private volatile StaticStrideScheduler scheduler;
516

517
    WeightedRoundRobinPicker(List<ChildLbState> children, boolean enableOobLoadReport,
518
        float errorUtilizationPenalty, AtomicInteger sequence,
519
        ImmutableList<String> metricNamesForComputingUtilization) {
1✔
520
      checkNotNull(children, "children");
1✔
521
      Preconditions.checkArgument(!children.isEmpty(), "empty child list");
1✔
522
      this.children = children;
1✔
523
      List<SubchannelPicker> pickers = new ArrayList<>(children.size());
1✔
524
      List<OrcaPerRequestReportListener> reportListeners = new ArrayList<>(children.size());
1✔
525
      for (ChildLbState child : children) {
1✔
526
        WeightedChildLbState wChild = (WeightedChildLbState) child;
1✔
527
        pickers.add(wChild.getCurrentPicker());
1✔
528
        reportListeners.add(wChild.getOrCreateOrcaListener(errorUtilizationPenalty,
1✔
529
            metricNamesForComputingUtilization));
530
      }
1✔
531
      this.pickers = pickers;
1✔
532
      this.reportListeners = reportListeners;
1✔
533
      this.enableOobLoadReport = enableOobLoadReport;
1✔
534
      this.errorUtilizationPenalty = errorUtilizationPenalty;
1✔
535
      this.sequence = checkNotNull(sequence, "sequence");
1✔
536

537
      // For equality we treat pickers as a set; use hash code as defined by Set
538
      int sum = 0;
1✔
539
      for (SubchannelPicker picker : pickers) {
1✔
540
        sum += picker.hashCode();
1✔
541
      }
1✔
542
      this.hashCode = sum
1✔
543
          ^ Boolean.hashCode(enableOobLoadReport)
1✔
544
          ^ Float.hashCode(errorUtilizationPenalty);
1✔
545
    }
1✔
546

547
    @Override
548
    public PickResult pickSubchannel(PickSubchannelArgs args) {
549
      int pick = scheduler.pick();
1✔
550
      PickResult pickResult = pickers.get(pick).pickSubchannel(args);
1✔
551
      Subchannel subchannel = pickResult.getSubchannel();
1✔
552
      if (subchannel == null) {
1✔
553
        return pickResult;
1✔
554
      }
555
      
556
      subchannel = ((WrrSubchannel) subchannel).delegate();
1✔
557
      if (!enableOobLoadReport) {
1✔
558
        return pickResult.copyWithSubchannel(subchannel)
1✔
559
            .copyWithStreamTracerFactory(
1✔
560
                OrcaPerRequestUtil.getInstance().newOrcaClientStreamTracerFactory(
1✔
561
                    reportListeners.get(pick)));
1✔
562
      } else {
563
        return pickResult.copyWithSubchannel(subchannel);
1✔
564
      }
565
    }
566

567
    /** Returns {@code true} if weights are different than round_robin. */
568
    private boolean updateWeight(float[] newWeights) {
569
      this.scheduler = new StaticStrideScheduler(newWeights, sequence);
1✔
570
      return !this.scheduler.usesRoundRobin();
1✔
571
    }
572

573
    @Override
574
    public String toString() {
575
      return MoreObjects.toStringHelper(WeightedRoundRobinPicker.class)
1✔
576
          .add("enableOobLoadReport", enableOobLoadReport)
1✔
577
          .add("errorUtilizationPenalty", errorUtilizationPenalty)
1✔
578
          .add("pickers", pickers)
1✔
579
          .toString();
1✔
580
    }
581

582
    @VisibleForTesting
583
    List<ChildLbState> getChildren() {
584
      return children;
1✔
585
    }
586

587
    @Override
588
    public int hashCode() {
589
      return hashCode;
×
590
    }
591

592
    @Override
593
    public boolean equals(Object o) {
594
      if (!(o instanceof WeightedRoundRobinPicker)) {
1✔
595
        return false;
×
596
      }
597
      WeightedRoundRobinPicker other = (WeightedRoundRobinPicker) o;
1✔
598
      if (other == this) {
1✔
599
        return true;
×
600
      }
601
      // the lists cannot contain duplicate subchannels
602
      return hashCode == other.hashCode
1✔
603
          && sequence == other.sequence
604
          && enableOobLoadReport == other.enableOobLoadReport
605
          && Float.compare(errorUtilizationPenalty, other.errorUtilizationPenalty) == 0
1✔
606
          && pickers.size() == other.pickers.size()
1✔
607
          && new HashSet<>(pickers).containsAll(other.pickers);
1✔
608
    }
609
  }
610

611
  /*
612
   * The Static Stride Scheduler is an implementation of an earliest deadline first (EDF) scheduler
613
   * in which each object's deadline is the multiplicative inverse of the object's weight.
614
   * <p>
615
   * The way in which this is implemented is through a static stride scheduler. 
616
   * The Static Stride Scheduler works by iterating through the list of subchannel weights
617
   * and using modular arithmetic to proportionally distribute picks, favoring entries 
618
   * with higher weights. It is based on the observation that the intended sequence generated 
619
   * from an EDF scheduler is a periodic one that can be achieved through modular arithmetic. 
620
   * The Static Stride Scheduler is more performant than other implementations of the EDF
621
   * Scheduler, as it removes the need for a priority queue (and thus mutex locks).
622
   * <p>
623
   * go/static-stride-scheduler
624
   * <p>
625
   *
626
   * <ul>
627
   *  <li>nextSequence() - O(1)
628
   *  <li>pick() - O(n)
629
   */
630
  @VisibleForTesting
631
  static final class StaticStrideScheduler {
632
    private final short[] scaledWeights;
633
    private final AtomicInteger sequence;
634
    private final boolean usesRoundRobin;
635
    private static final int K_MAX_WEIGHT = 0xFFFF;
636

637
    // Assuming the mean of all known weights is M, StaticStrideScheduler will clamp
638
    // weights bigger than M*kMaxRatio and weights smaller than M*kMinRatio.
639
    //
640
    // This is done as a performance optimization by limiting the number of rounds for picks
641
    // for edge cases where channels have large differences in subchannel weights.
642
    // In this case, without these clips, it would potentially require the scheduler to
643
    // frequently traverse through the entire subchannel list within the pick method.
644
    //
645
    // The current values of 10 and 0.1 were chosen without any experimenting. It should
646
    // decrease the amount of sequences that the scheduler must traverse through in order
647
    // to pick a high weight subchannel in such corner cases.
648
    // But, it also makes WeightedRoundRobin to send slightly more requests to
649
    // potentially very bad tasks (that would have near-zero weights) than zero.
650
    // This is not necessarily a downside, though. Perhaps this is not a problem at
651
    // all, and we can increase this value if needed to save CPU cycles.
652
    private static final double K_MAX_RATIO = 10;
653
    private static final double K_MIN_RATIO = 0.1;
654

655
    StaticStrideScheduler(float[] weights, AtomicInteger sequence) {
1✔
656
      checkArgument(weights.length >= 1, "Couldn't build scheduler: requires at least one weight");
1✔
657
      int numChannels = weights.length;
1✔
658
      int numWeightedChannels = 0;
1✔
659
      double sumWeight = 0;
1✔
660
      double unscaledMeanWeight;
661
      float unscaledMaxWeight = 0;
1✔
662
      for (float weight : weights) {
1✔
663
        if (weight > 0) {
1✔
664
          sumWeight += weight;
1✔
665
          unscaledMaxWeight = Math.max(weight, unscaledMaxWeight);
1✔
666
          numWeightedChannels++;
1✔
667
        }
668
      }
669

670
      // Adjust max value s.t. ratio does not exceed K_MAX_RATIO. This should
671
      // ensure that we on average do at most K_MAX_RATIO rounds for picks.
672
      if (numWeightedChannels > 0) {
1✔
673
        unscaledMeanWeight = sumWeight / numWeightedChannels;
1✔
674
        unscaledMaxWeight = Math.min(unscaledMaxWeight, (float) (K_MAX_RATIO * unscaledMeanWeight));
1✔
675
      } else {
676
        // Fall back to round robin if all values are non-positives. Note that
677
        // numWeightedChannels == 1 also behaves like RR because the weights are all the same, but
678
        // the weights aren't 1, so it doesn't go through this path.
679
        unscaledMeanWeight = 1;
1✔
680
        unscaledMaxWeight = 1;
1✔
681
      }
682
      // We need at least two weights for WRR to be distinguishable from round_robin.
683
      usesRoundRobin = numWeightedChannels < 2;
1✔
684

685
      // Scales weights s.t. max(weights) == K_MAX_WEIGHT, meanWeight is scaled accordingly.
686
      // Note that, since we cap the weights to stay within K_MAX_RATIO, meanWeight might not
687
      // match the actual mean of the values that end up in the scheduler.
688
      double scalingFactor = K_MAX_WEIGHT / unscaledMaxWeight;
1✔
689
      // We compute weightLowerBound and clamp it to 1 from below so that in the
690
      // worst case, we represent tiny weights as 1.
691
      int weightLowerBound = (int) Math.ceil(scalingFactor * unscaledMeanWeight * K_MIN_RATIO);
1✔
692
      short[] scaledWeights = new short[numChannels];
1✔
693
      for (int i = 0; i < numChannels; i++) {
1✔
694
        if (weights[i] <= 0) {
1✔
695
          scaledWeights[i] = (short) Math.round(scalingFactor * unscaledMeanWeight);
1✔
696
        } else {
697
          int weight = (int) Math.round(scalingFactor * Math.min(weights[i], unscaledMaxWeight));
1✔
698
          scaledWeights[i] = (short) Math.max(weight, weightLowerBound);
1✔
699
        }
700
      }
701

702
      this.scaledWeights = scaledWeights;
1✔
703
      this.sequence = sequence;
1✔
704
    }
1✔
705

706
    // Without properly weighted channels, we do plain vanilla round_robin.
707
    boolean usesRoundRobin() {
708
      return usesRoundRobin;
1✔
709
    }
710

711
    /**
712
     * Returns the next sequence number and atomically increases sequence with wraparound.
713
     */
714
    private long nextSequence() {
715
      return Integer.toUnsignedLong(sequence.getAndIncrement());
1✔
716
    }
717

718
    /*
719
     * Selects index of next backend server.
720
     * <p>
721
     * A 2D array is compactly represented as a function of W(backend), where the row
722
     * represents the generation and the column represents the backend index:
723
     * X(backend,generation) | generation ∈ [0,kMaxWeight).
724
     * Each element in the conceptual array is a boolean indicating whether the backend at
725
     * this index should be picked now. If false, the counter is incremented again,
726
     * and the new element is checked. An atomically incremented counter keeps track of our
727
     * backend and generation through modular arithmetic within the pick() method.
728
     * <p>
729
     * Modular arithmetic allows us to evenly distribute picks and skips between
730
     * generations based on W(backend).
731
     * X(backend,generation) = (W(backend) * generation) % kMaxWeight >= kMaxWeight - W(backend)
732
     * If we have the same three backends with weights:
733
     * W(backend) = {2,3,6} scaled to max(W(backend)) = 6, then X(backend,generation) is:
734
     * <p>
735
     * B0    B1    B2
736
     * T     T     T
737
     * F     F     T
738
     * F     T     T
739
     * T     F     T
740
     * F     T     T
741
     * F     F     T
742
     * The sequence of picked backend indices is given by
743
     * walking across and down: {0,1,2,2,1,2,0,2,1,2,2}.
744
     * <p>
745
     * To reduce the variance and spread the wasted work among different picks,
746
     * an offset that varies per backend index is also included to the calculation.
747
     */
748
    int pick() {
749
      while (true) {
750
        long sequence = this.nextSequence();
1✔
751
        int backendIndex = (int) (sequence % scaledWeights.length);
1✔
752
        long generation = sequence / scaledWeights.length;
1✔
753
        int weight = Short.toUnsignedInt(scaledWeights[backendIndex]);
1✔
754
        long offset = (long) K_MAX_WEIGHT / 2 * backendIndex;
1✔
755
        if ((weight * generation + offset) % K_MAX_WEIGHT < K_MAX_WEIGHT - weight) {
1✔
756
          continue;
1✔
757
        }
758
        return backendIndex;
1✔
759
      }
760
    }
761
  }
762

763
  static final class WeightedRoundRobinLoadBalancerConfig {
764
    final long blackoutPeriodNanos;
765
    final long weightExpirationPeriodNanos;
766
    final boolean enableOobLoadReport;
767
    final long oobReportingPeriodNanos;
768
    final long weightUpdatePeriodNanos;
769
    final float errorUtilizationPenalty;
770
    final ImmutableList<String> metricNamesForComputingUtilization;
771

772
    public static Builder newBuilder() {
773
      return new Builder();
1✔
774
    }
775

776
    private WeightedRoundRobinLoadBalancerConfig(long blackoutPeriodNanos,
777
        long weightExpirationPeriodNanos, boolean enableOobLoadReport, long oobReportingPeriodNanos,
778
        long weightUpdatePeriodNanos, float errorUtilizationPenalty,
779
        ImmutableList<String> metricNamesForComputingUtilization) {
1✔
780
      this.blackoutPeriodNanos = blackoutPeriodNanos;
1✔
781
      this.weightExpirationPeriodNanos = weightExpirationPeriodNanos;
1✔
782
      this.enableOobLoadReport = enableOobLoadReport;
1✔
783
      this.oobReportingPeriodNanos = oobReportingPeriodNanos;
1✔
784
      this.weightUpdatePeriodNanos = weightUpdatePeriodNanos;
1✔
785
      this.errorUtilizationPenalty = errorUtilizationPenalty;
1✔
786
      this.metricNamesForComputingUtilization = metricNamesForComputingUtilization;
1✔
787
    }
1✔
788

789
    @Override
790
    public boolean equals(Object o) {
791
      if (!(o instanceof WeightedRoundRobinLoadBalancerConfig)) {
1✔
792
        return false;
1✔
793
      }
794
      WeightedRoundRobinLoadBalancerConfig that = (WeightedRoundRobinLoadBalancerConfig) o;
1✔
795
      return this.blackoutPeriodNanos == that.blackoutPeriodNanos
1✔
796
          && this.weightExpirationPeriodNanos == that.weightExpirationPeriodNanos
797
          && this.enableOobLoadReport == that.enableOobLoadReport
798
          && this.oobReportingPeriodNanos == that.oobReportingPeriodNanos
799
          && this.weightUpdatePeriodNanos == that.weightUpdatePeriodNanos
800
          // Float.compare considers NaNs equal
801
          && Float.compare(this.errorUtilizationPenalty, that.errorUtilizationPenalty) == 0
1✔
802
          && Objects.equals(this.metricNamesForComputingUtilization,
1✔
803
              that.metricNamesForComputingUtilization);
804
    }
805

806
    @Override
807
    public int hashCode() {
808
      return Objects.hash(blackoutPeriodNanos, weightExpirationPeriodNanos, enableOobLoadReport,
1✔
809
          oobReportingPeriodNanos, weightUpdatePeriodNanos, errorUtilizationPenalty,
1✔
810
          metricNamesForComputingUtilization);
811
    }
812

813
    static final class Builder {
814
      long blackoutPeriodNanos = 10_000_000_000L; // 10s
1✔
815
      long weightExpirationPeriodNanos = 180_000_000_000L; // 3min
1✔
816
      boolean enableOobLoadReport = false;
1✔
817
      long oobReportingPeriodNanos = 10_000_000_000L; // 10s
1✔
818
      long weightUpdatePeriodNanos = 1_000_000_000L; // 1s
1✔
819
      float errorUtilizationPenalty = 1.0F;
1✔
820
      ImmutableList<String> metricNamesForComputingUtilization = ImmutableList.of();
1✔
821

822
      private Builder() {
1✔
823

824
      }
1✔
825

826
      @SuppressWarnings("UnusedReturnValue")
827
      Builder setBlackoutPeriodNanos(long blackoutPeriodNanos) {
828
        this.blackoutPeriodNanos = blackoutPeriodNanos;
1✔
829
        return this;
1✔
830
      }
831

832
      @SuppressWarnings("UnusedReturnValue")
833
      Builder setWeightExpirationPeriodNanos(long weightExpirationPeriodNanos) {
834
        this.weightExpirationPeriodNanos = weightExpirationPeriodNanos;
1✔
835
        return this;
1✔
836
      }
837

838
      Builder setEnableOobLoadReport(boolean enableOobLoadReport) {
839
        this.enableOobLoadReport = enableOobLoadReport;
1✔
840
        return this;
1✔
841
      }
842

843
      Builder setOobReportingPeriodNanos(long oobReportingPeriodNanos) {
844
        this.oobReportingPeriodNanos = oobReportingPeriodNanos;
1✔
845
        return this;
1✔
846
      }
847

848
      Builder setWeightUpdatePeriodNanos(long weightUpdatePeriodNanos) {
849
        this.weightUpdatePeriodNanos = weightUpdatePeriodNanos;
1✔
850
        return this;
1✔
851
      }
852

853
      Builder setErrorUtilizationPenalty(float errorUtilizationPenalty) {
854
        this.errorUtilizationPenalty = errorUtilizationPenalty;
1✔
855
        return this;
1✔
856
      }
857

858
      Builder setMetricNamesForComputingUtilization(
859
          List<String> metricNamesForComputingUtilization) {
860
        this.metricNamesForComputingUtilization =
1✔
861
            ImmutableList.copyOf(metricNamesForComputingUtilization);
1✔
862
        return this;
1✔
863
      }
864

865
      WeightedRoundRobinLoadBalancerConfig build() {
866
        return new WeightedRoundRobinLoadBalancerConfig(blackoutPeriodNanos,
1✔
867
            weightExpirationPeriodNanos, enableOobLoadReport, oobReportingPeriodNanos,
868
            weightUpdatePeriodNanos, errorUtilizationPenalty, metricNamesForComputingUtilization);
869
      }
870
    }
871
  }
872
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc