• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

grpc / grpc-java / #19481

30 Sep 2024 03:17PM UTC coverage: 84.554% (-0.03%) from 84.584%
#19481

push

github

ejona86
xds: Have ClusterManagerLB use child map for preserving children

Instead of doing a dance of supplementing config so the later
createChildAddressesMap() won't delete children, just look at the
existing children and don't delete any that shouldn't be deleted.

33645 of 39791 relevant lines covered (84.55%)

0.85 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.99
/../xds/src/main/java/io/grpc/xds/ClusterManagerLoadBalancer.java
1
/*
2
 * Copyright 2020 The gRPC Authors
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at
7
 *
8
 *     http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16

17
package io.grpc.xds;
18

19
import static com.google.common.base.Preconditions.checkNotNull;
20
import static io.grpc.ConnectivityState.TRANSIENT_FAILURE;
21

22
import com.google.common.annotations.VisibleForTesting;
23
import com.google.common.base.MoreObjects;
24
import io.grpc.ConnectivityState;
25
import io.grpc.InternalLogId;
26
import io.grpc.LoadBalancer;
27
import io.grpc.Status;
28
import io.grpc.SynchronizationContext;
29
import io.grpc.SynchronizationContext.ScheduledHandle;
30
import io.grpc.util.GracefulSwitchLoadBalancer;
31
import io.grpc.util.MultiChildLoadBalancer;
32
import io.grpc.xds.ClusterManagerLoadBalancerProvider.ClusterManagerConfig;
33
import io.grpc.xds.client.XdsLogger;
34
import io.grpc.xds.client.XdsLogger.XdsLogLevel;
35
import java.util.HashMap;
36
import java.util.Map;
37
import java.util.Map.Entry;
38
import java.util.concurrent.ScheduledExecutorService;
39
import java.util.concurrent.TimeUnit;
40
import javax.annotation.Nullable;
41

42
/**
43
 * The top-level load balancing policy for use in XDS.
44
 * This policy does not immediately delete its children.  Instead, it marks them deactivated
45
 * and starts a timer for deletion.  If a subsequent address update restores the child, then it is
46
 * simply reactivated instead of built from scratch.  This is necessary because XDS can frequently
47
 * remove and then add back a server as machines are rebooted or repurposed for load management.
48
 *
49
 * <p>Note that this LB does not automatically reconnect children who go into IDLE status
50
 */
51
class ClusterManagerLoadBalancer extends MultiChildLoadBalancer {
52

53
  // 15 minutes is long enough for a reboot and the services to restart while not so long that
54
  // many children are waiting for cleanup.
55
  @VisibleForTesting
56
  public static final int DELAYED_CHILD_DELETION_TIME_MINUTES = 15;
57
  protected final SynchronizationContext syncContext;
58
  private final ScheduledExecutorService timeService;
59
  private final XdsLogger logger;
60
  private ResolvedAddresses lastResolvedAddresses;
61

62
  ClusterManagerLoadBalancer(Helper helper) {
63
    super(helper);
1✔
64
    this.syncContext = checkNotNull(helper.getSynchronizationContext(), "syncContext");
1✔
65
    this.timeService = checkNotNull(helper.getScheduledExecutorService(), "timeService");
1✔
66
    logger = XdsLogger.withLogId(
1✔
67
        InternalLogId.allocate("cluster_manager-lb", helper.getAuthority()));
1✔
68

69
    logger.log(XdsLogLevel.INFO, "Created");
1✔
70
  }
1✔
71

72
  @Override
73
  protected ChildLbState createChildLbState(Object key) {
74
    return new ClusterManagerLbState(key, GracefulSwitchLoadBalancerFactory.INSTANCE);
1✔
75
  }
76

77
  @Override
78
  protected Map<Object, ResolvedAddresses> createChildAddressesMap(
79
      ResolvedAddresses resolvedAddresses) {
80
    lastResolvedAddresses = resolvedAddresses;
1✔
81

82
    ClusterManagerConfig config = (ClusterManagerConfig)
1✔
83
        resolvedAddresses.getLoadBalancingPolicyConfig();
1✔
84
    Map<Object, ResolvedAddresses> childAddresses = new HashMap<>();
1✔
85

86
    // Reactivate children with config; deactivate children without config
87
    for (ChildLbState rawState : getChildLbStates()) {
1✔
88
      ClusterManagerLbState state = (ClusterManagerLbState) rawState;
1✔
89
      if (config.childPolicies.containsKey(state.getKey())) {
1✔
90
        // Active child
91
        if (state.deletionTimer != null) {
1✔
92
          state.reactivateChild();
1✔
93
        }
94
      } else {
95
        // Inactive child
96
        if (state.deletionTimer == null) {
1✔
97
          state.deactivateChild();
1✔
98
        }
99
        if (state.deletionTimer.isPending()) {
1✔
100
          childAddresses.put(state.getKey(), null); // Preserve child, without config update
1✔
101
        }
102
      }
103
    }
1✔
104

105
    for (Map.Entry<String, Object> childPolicy : config.childPolicies.entrySet()) {
1✔
106
      ResolvedAddresses addresses = resolvedAddresses.toBuilder()
1✔
107
          .setLoadBalancingPolicyConfig(childPolicy.getValue())
1✔
108
          .build();
1✔
109
      childAddresses.put(childPolicy.getKey(), addresses);
1✔
110
    }
1✔
111
    logger.log(
1✔
112
        XdsLogLevel.INFO,
113
        "Received cluster_manager lb config: child names={0}", config.childPolicies.keySet());
1✔
114
    return childAddresses;
1✔
115
  }
116

117
  /**
118
   * Using the state of all children will calculate the current connectivity state,
119
   * update currentConnectivityState, generate a picker and then call
120
   * {@link Helper#updateBalancingState(ConnectivityState, SubchannelPicker)}.
121
   */
122
  @Override
123
  protected void updateOverallBalancingState() {
124
    ConnectivityState overallState = null;
1✔
125
    final Map<Object, SubchannelPicker> childPickers = new HashMap<>();
1✔
126
    for (ChildLbState childLbState : getChildLbStates()) {
1✔
127
      if (((ClusterManagerLbState) childLbState).deletionTimer != null) {
1✔
128
        continue;
1✔
129
      }
130
      childPickers.put(childLbState.getKey(), childLbState.getCurrentPicker());
1✔
131
      overallState = aggregateState(overallState, childLbState.getCurrentState());
1✔
132
    }
1✔
133

134
    if (overallState != null) {
1✔
135
      getHelper().updateBalancingState(overallState, getSubchannelPicker(childPickers));
1✔
136
      currentConnectivityState = overallState;
1✔
137
    }
138
  }
1✔
139

140
  protected SubchannelPicker getSubchannelPicker(Map<Object, SubchannelPicker> childPickers) {
141
    return new SubchannelPicker() {
1✔
142
      @Override
143
      public PickResult pickSubchannel(PickSubchannelArgs args) {
144
        String clusterName =
1✔
145
            args.getCallOptions().getOption(XdsNameResolver.CLUSTER_SELECTION_KEY);
1✔
146
        SubchannelPicker childPicker = childPickers.get(clusterName);
1✔
147
        if (childPicker == null) {
1✔
148
          return
1✔
149
              PickResult.withError(
1✔
150
                  Status.UNAVAILABLE.withDescription("CDS encountered error: unable to find "
1✔
151
                      + "available subchannel for cluster " + clusterName));
152
        }
153
        return childPicker.pickSubchannel(args);
1✔
154
      }
155

156
      @Override
157
      public String toString() {
158
        return MoreObjects.toStringHelper(this).add("pickers", childPickers).toString();
×
159
      }
160
    };
161
  }
162

163
  @Override
164
  public void handleNameResolutionError(Status error) {
165
    logger.log(XdsLogLevel.WARNING, "Received name resolution error: {0}", error);
1✔
166
    boolean gotoTransientFailure = true;
1✔
167
    for (ChildLbState state : getChildLbStates()) {
1✔
168
      if (((ClusterManagerLbState) state).deletionTimer == null) {
1✔
169
        gotoTransientFailure = false;
1✔
170
        state.getLb().handleNameResolutionError(error);
1✔
171
      }
172
    }
1✔
173
    if (gotoTransientFailure) {
1✔
174
      getHelper().updateBalancingState(
1✔
175
          TRANSIENT_FAILURE, new FixedResultPicker(PickResult.withError(error)));
1✔
176
    }
177
  }
1✔
178

179
  /**
180
   * This differs from the base class in the use of the deletion timer.  When it is deactivated,
181
   * rather than immediately calling shutdown it starts a timer.  If shutdown or reactivate
182
   * are called before the timer fires, the timer is canceled.  Otherwise, time timer calls shutdown
183
   * and removes the child from the petiole policy when it is triggered.
184
   */
185
  private class ClusterManagerLbState extends ChildLbState {
1✔
186
    @Nullable
187
    ScheduledHandle deletionTimer;
188

189
    public ClusterManagerLbState(Object key, LoadBalancer.Factory policyFactory) {
1✔
190
      super(key, policyFactory);
1✔
191
    }
1✔
192

193
    @Override
194
    protected ChildLbStateHelper createChildHelper() {
195
      return new ClusterManagerChildHelper();
1✔
196
    }
197

198
    @Override
199
    protected void shutdown() {
200
      if (deletionTimer != null) {
1✔
201
        deletionTimer.cancel();
1✔
202
        deletionTimer = null;
1✔
203
      }
204
      super.shutdown();
1✔
205
    }
1✔
206

207
    void reactivateChild() {
208
      assert deletionTimer != null;
1✔
209
      deletionTimer.cancel();
1✔
210
      deletionTimer = null;
1✔
211
      logger.log(XdsLogLevel.DEBUG, "Child balancer {0} reactivated", getKey());
1✔
212
    }
1✔
213

214
    void deactivateChild() {
215
      assert deletionTimer == null;
1✔
216

217
      class DeletionTask implements Runnable {
1✔
218

219
        @Override
220
        public void run() {
221
          acceptResolvedAddresses(lastResolvedAddresses);
1✔
222
        }
1✔
223
      }
224

225
      deletionTimer =
1✔
226
          syncContext.schedule(
1✔
227
              new DeletionTask(),
228
              DELAYED_CHILD_DELETION_TIME_MINUTES,
229
              TimeUnit.MINUTES,
230
              timeService);
1✔
231
      logger.log(XdsLogLevel.DEBUG, "Child balancer {0} deactivated", getKey());
1✔
232
    }
1✔
233

234
    private class ClusterManagerChildHelper extends ChildLbStateHelper {
1✔
235
      @Override
236
      public void updateBalancingState(final ConnectivityState newState,
237
                                       final SubchannelPicker newPicker) {
238
        if (getCurrentState() == ConnectivityState.SHUTDOWN) {
1✔
239
          return;
1✔
240
        }
241

242
        // Subchannel picker and state are saved, but will only be propagated to the channel
243
        // when the child instance exits deactivated state.
244
        setCurrentState(newState);
1✔
245
        setCurrentPicker(newPicker);
1✔
246
        // If we are already in the process of resolving addresses, the overall balancing state
247
        // will be updated at the end of it, and we don't need to trigger that update here.
248
        if (deletionTimer == null && !resolvingAddresses) {
1✔
249
          updateOverallBalancingState();
1✔
250
        }
251
      }
1✔
252
    }
253
  }
254

255
  static final class GracefulSwitchLoadBalancerFactory extends LoadBalancer.Factory {
1✔
256
    static final LoadBalancer.Factory INSTANCE = new GracefulSwitchLoadBalancerFactory();
1✔
257

258
    @Override
259
    public LoadBalancer newLoadBalancer(LoadBalancer.Helper helper) {
260
      return new GracefulSwitchLoadBalancer(helper);
1✔
261
    }
262
  }
263
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc