adamant-kite-43734
02/22/2024, 5:40 PMminiature-advantage-78722
02/22/2024, 6:10 PMminiature-advantage-78722
02/22/2024, 6:14 PMkubectl logs system-upgrade-controller-75b6cc967f-vg275 -n cattle-system -f
W0222 15:26:53.512430 1 client_config.go:615] Neither --kubeconfig nor --master was specified. Using the inClusterConfig. This might not work.
time="2024-02-22T15:26:54Z" level=info msg="Applying CRD <http://plans.upgrade.cattle.io|plans.upgrade.cattle.io>"
time="2024-02-22T15:26:55Z" level=info msg="Starting /v1, Kind=Node controller"
time="2024-02-22T15:26:55Z" level=info msg="Starting /v1, Kind=Secret controller"
time="2024-02-22T15:26:55Z" level=info msg="Starting batch/v1, Kind=Job controller"
time="2024-02-22T15:26:55Z" level=info msg="Starting <http://upgrade.cattle.io/v1|upgrade.cattle.io/v1>, Kind=Plan controller"
time="2024-02-22T15:31:00Z" level=error msg="error syncing 'cattle-system/apply-hvst-upgrade-z2m89-prepare-on-hm11-with-426793949a8-5abc4': handler system-upgrade-controller: jobs.batch \"apply-hvst-upgrade-z2m89-prepare-on-hm11-with-426793949a8-5abc4\" not found, requeuing"
time="2024-02-22T15:39:14Z" level=error msg="error syncing 'cattle-system/apply-hvst-upgrade-z2m89-prepare-on-hm7-with-426793949a80-675b3': handler system-upgrade-controller: jobs.batch \"apply-hvst-upgrade-z2m89-prepare-on-hm7-with-426793949a80-675b3\" not found, requeuing"
time="2024-02-22T15:41:31Z" level=error msg="error syncing 'cattle-system/apply-system-agent-upgrader-on-hm12-with-0924652264babcfe-6a91d': handler system-upgrade-controller: jobs.batch \"apply-system-agent-upgrader-on-hm12-with-0924652264babcfe-6a91d\" not found, requeuing"
time="2024-02-22T15:41:32Z" level=error msg="error syncing 'cattle-system/apply-system-agent-upgrader-on-hm3-with-0924652264babcfe0-74af3': handler system-upgrade-controller: jobs.batch \"apply-system-agent-upgrader-on-hm3-with-0924652264babcfe0-74af3\" not found, requeuing"
time="2024-02-22T15:41:32Z" level=error msg="error syncing 'cattle-system/apply-system-agent-upgrader-on-hm2-with-0924652264babcfe0-20689': handler system-upgrade-controller: jobs.batch \"apply-system-agent-upgrader-on-hm2-with-0924652264babcfe0-20689\" not found, requeuing"
time="2024-02-22T15:41:33Z" level=error msg="error syncing 'cattle-system/apply-system-agent-upgrader-on-hm11-with-0924652264babcfe-6a55e': handler system-upgrade-controller: jobs.batch \"apply-system-agent-upgrader-on-hm11-with-0924652264babcfe-6a55e\" not found, requeuing"
time="2024-02-22T15:41:33Z" level=error msg="error syncing 'cattle-system/apply-system-agent-upgrader-on-hm8-with-0924652264babcfe0-240e8': handler system-upgrade-controller: jobs.batch \"apply-system-agent-upgrader-on-hm8-with-0924652264babcfe0-240e8\" not found, requeuing"
time="2024-02-22T15:41:34Z" level=error msg="error syncing 'cattle-system/apply-system-agent-upgrader-on-hm9-with-0924652264babcfe0-bfce2': handler system-upgrade-controller: jobs.batch \"apply-system-agent-upgrader-on-hm9-with-0924652264babcfe0-bfce2\" not found, requeuing"
time="2024-02-22T15:41:43Z" level=error msg="error syncing 'cattle-system/apply-system-agent-upgrader-on-hm4-with-0924652264babcfe0-69bfe': handler system-upgrade-controller: jobs.batch \"apply-system-agent-upgrader-on-hm4-with-0924652264babcfe0-69bfe\" not found, requeuing"
time="2024-02-22T15:41:47Z" level=error msg="error syncing 'cattle-system/apply-system-agent-upgrader-on-hm10-with-0924652264babcfe-17432': handler system-upgrade-controller: jobs.batch \"apply-system-agent-upgrader-on-hm10-with-0924652264babcfe-17432\" not found, requeuing"
time="2024-02-22T15:41:48Z" level=error msg="error syncing 'cattle-system/apply-system-agent-upgrader-on-hm6-with-0924652264babcfe0-fcc4e': handler system-upgrade-controller: jobs.batch \"apply-system-agent-upgrader-on-hm6-with-0924652264babcfe0-fcc4e\" not found, requeuing"
miniature-advantage-78722
02/22/2024, 6:43 PMkubectl get upgrades -A
NAMESPACE NAME AGE
harvester-system hvst-upgrade-6mt6l 268d
harvester-system hvst-upgrade-z2m89 4h52m
miniature-advantage-78722
02/22/2024, 7:41 PM2024/02/22 19:37:09 [ERROR] Failed to dial steve aggregation server: EOF
2024/02/22 19:37:14 [ERROR] Failed to dial steve aggregation server: EOF
2024/02/22 19:37:19 [ERROR] Failed to dial steve aggregation server: EOF
2024/02/22 19:37:24 [ERROR] Failed to dial steve aggregation server: EOF
2024/02/22 19:37:29 [ERROR] Failed to dial steve aggregation server: EOF
2024/02/22 19:37:34 [ERROR] Failed to dial steve aggregation server: EOF
2024/02/22 19:37:39 [ERROR] Failed to dial steve aggregation server: EOF
2024/02/22 19:37:44 [ERROR] Failed to dial steve aggregation server: EOF
2024/02/22 19:37:49 [ERROR] Failed to dial steve aggregation server: EOF
2024/02/22 19:37:54 [ERROR] Failed to dial steve aggregation server: EOF
2024/02/22 19:37:59 [ERROR] Failed to dial steve aggregation server: EOF
2024/02/22 19:38:04 [ERROR] Failed to dial steve aggregation server: EOF
2024/02/22 19:38:09 [ERROR] Failed to dial steve aggregation server: EOF
2024/02/22 19:38:14 [ERROR] Failed to dial steve aggregation server: EOF
2024/02/22 19:38:19 [ERROR] Failed to dial steve aggregation server: EOF
2024/02/22 19:38:24 [ERROR] Failed to dial steve aggregation server: EOF
2024/02/22 19:38:29 [ERROR] Failed to dial steve aggregation server: EOF
2024/02/22 19:38:34 [ERROR] Failed to dial steve aggregation server: EOF
2024/02/22 19:38:39 [ERROR] Failed to dial steve aggregation server: EOF
2024/02/22 19:38:44 [ERROR] Failed to dial steve aggregation server: EOF
2024/02/22 19:38:49 [ERROR] Failed to dial steve aggregation server: EOF
miniature-advantage-78722
02/22/2024, 9:58 PMupgradeHandler.OnChange
go
func (h *upgradeHandler) OnChanged(key string, upgrade *harvesterv1.Upgrade) (*harvesterv1.Upgrade, error) {
if upgrade == nil || upgrade.DeletionTimestamp != nil {
return upgrade, nil
}
upgradeControllerLock.Lock()
defer upgradeControllerLock.Unlock()
repo := NewUpgradeRepo(h.ctx, upgrade, h)
if harvesterv1.UpgradeCompleted.GetStatus(upgrade) == "" {
logrus.Infof("Initialize upgrade %s/%s", upgrade.Namespace, upgrade.Name)
if err := h.resetLatestUpgradeLabel(upgrade.Name); err != nil {
return nil, err
}
toUpdate := upgrade.DeepCopy()
initStatus(toUpdate)
if !upgrade.Spec.LogEnabled {
<http://logrus.Info|logrus.Info>("Upgrade observability is administratively disabled")
setLogReadyCondition(toUpdate, corev1.ConditionFalse, "Disabled", "Upgrade observability is administratively disabled")
toUpdate.Labels[upgradeStateLabel] = StateLoggingInfraPrepared
return h.upgradeClient.Update(toUpdate)
}
<http://logrus.Info|logrus.Info>("Enabling upgrade observability")
upgradeLog, err := h.upgradeLogClient.Create(prepareUpgradeLog(upgrade))
if err != nil && !apierrors.IsAlreadyExists(err) {
logrus.Warn("Failed to create the upgradeLog resource")
setLogReadyCondition(toUpdate, corev1.ConditionFalse, err.Error(), "")
} else {
toUpdate.Status.UpgradeLog = upgradeLog.Name
}
harvesterv1.LogReady.CreateUnknownIfNotExists(toUpdate)
return h.upgradeClient.Update(toUpdate)
}
if (harvesterv1.LogReady.IsTrue(upgrade) || harvesterv1.LogReady.IsFalse(upgrade)) && harvesterv1.ImageReady.GetStatus(upgrade) == "" {
<http://logrus.Info|logrus.Info>("Creating upgrade repo image")
toUpdate := upgrade.DeepCopy()
if upgrade.Spec.Image == "" {
version, err := h.versionCache.Get(h.namespace, upgrade.Spec.Version)
if err != nil {
setUpgradeCompletedCondition(toUpdate, StateFailed, corev1.ConditionFalse, err.Error(), "")
return h.upgradeClient.Update(toUpdate)
}
image, err := repo.CreateImageFromISO(version.Spec.ISOURL, version.Spec.ISOChecksum)
if err != nil && apierrors.IsAlreadyExists(err) {
image, err = h.vmImageClient.Get(harvesterSystemNamespace, upgrade.Name, metav1.GetOptions{})
if err != nil {
setUpgradeCompletedCondition(toUpdate, StateFailed, corev1.ConditionFalse, err.Error(), "")
return h.upgradeClient.Update(toUpdate)
}
logrus.Infof("Reuse the existing image: %s/%s", image.Namespace, image.Name)
} else if err != nil && !apierrors.IsAlreadyExists(err) {
setUpgradeCompletedCondition(toUpdate, StateFailed, corev1.ConditionFalse, err.Error(), "")
return h.upgradeClient.Update(toUpdate)
}
toUpdate.Status.ImageID = fmt.Sprintf("%s/%s", image.Namespace, image.Name)
} else {
image, err := repo.GetImage(upgrade.Spec.Image)
if err != nil {
setUpgradeCompletedCondition(toUpdate, StateFailed, corev1.ConditionFalse, err.Error(), "")
return h.upgradeClient.Update(toUpdate)
}
toUpdate.Status.ImageID = fmt.Sprintf("%s/%s", image.Namespace, image.Name)
// The image might not be imported yet. Set upgrade label and let
// vmImageHandler deal with it.
imageUpdate := image.DeepCopy()
if imageUpdate.Labels == nil {
imageUpdate.Labels = map[string]string{}
}
imageUpdate.Labels[harvesterUpgradeLabel] = upgrade.Name
if _, err := h.vmImageClient.Update(imageUpdate); err != nil {
return nil, err
}
}
harvesterv1.ImageReady.CreateUnknownIfNotExists(toUpdate)
return h.upgradeClient.Update(toUpdate)
}
logrus.Infof("handle upgrade %s/%s with labels %v", upgrade.Namespace, upgrade.Name, upgrade.Labels)
// only run further operations for latest upgrade
if upgrade.Labels == nil || upgrade.Labels[harvesterLatestUpgradeLabel] != "true" {
logrus.Infof("skipping old upgrade %s/%s", upgrade.Namespace, upgrade.Name)
return upgrade, nil
}
// clean upgrade repo VMs and images if a upgrade succeeds or fails.
if harvesterv1.UpgradeCompleted.IsTrue(upgrade) || harvesterv1.UpgradeCompleted.IsFalse(upgrade) {
s := "completed"
if harvesterv1.UpgradeCompleted.IsFalse(upgrade) {
s = "failed"
}
logrus.Infof("handling %s upgrade %s/%s", s, upgrade.Namespace, upgrade.Name)
return nil, h.cleanup(upgrade, harvesterv1.UpgradeCompleted.IsTrue(upgrade))
}
if harvesterv1.ImageReady.IsTrue(upgrade) && harvesterv1.RepoProvisioned.GetStatus(upgrade) == "" {
<http://logrus.Info|logrus.Info>("Starting upgrade repo VM")
toUpdate := upgrade.DeepCopy()
if err := repo.Bootstrap(); err != nil && !apierrors.IsAlreadyExists(err) {
setUpgradeCompletedCondition(toUpdate, StateFailed, corev1.ConditionFalse, err.Error(), "")
return h.upgradeClient.Update(toUpdate)
}
toUpdate.Labels[upgradeStateLabel] = StatePreparingRepo
harvesterv1.RepoProvisioned.CreateUnknownIfNotExists(toUpdate)
return h.upgradeClient.Update(toUpdate)
}
if harvesterv1.RepoProvisioned.IsTrue(upgrade) && harvesterv1.NodesPrepared.GetStatus(upgrade) == "" {
logrus.Infof("prepping nodes %s/%s", upgrade.Namespace, upgrade.Name)
toUpdate := upgrade.DeepCopy()
singleNode, err := h.isSingleNodeCluster()
if err != nil {
return nil, err
}
toUpdate.Status.SingleNode = singleNode
backoff := wait.Backoff{
Steps: 30,
Duration: 10 * time.Second,
Factor: 1.0,
Jitter: 0.1,
}
var repoInfo *RepoInfo
if err := retry.OnError(backoff, util.IsRetriableNetworkError, func() error {
repoInfo, err = repo.getInfo()
if err != nil {
logrus.Warnf("Repo info retrieval failed with: %s", err)
return err
}
return nil
}); err != nil {
setUpgradeCompletedCondition(toUpdate, StateFailed, corev1.ConditionFalse, err.Error(), "")
return h.upgradeClient.Update(toUpdate)
}
repoInfoStr, err := repoInfo.Marshall()
if err != nil {
setUpgradeCompletedCondition(toUpdate, StateFailed, corev1.ConditionFalse, err.Error(), "")
return h.upgradeClient.Update(toUpdate)
}
<http://logrus.Info|logrus.Info>("Check minimum upgradable version")
if err := isVersionUpgradable(toUpdate.Status.PreviousVersion, repoInfo.Release.MinUpgradableVersion); err != nil {
setUpgradeCompletedCondition(toUpdate, StateFailed, corev1.ConditionFalse, err.Error(), "")
return h.upgradeClient.Update(toUpdate)
}
logrus.Debug("Start preparing nodes for upgrade")
if _, err := h.planClient.Create(preparePlan(upgrade)); err != nil && !apierrors.IsAlreadyExists(err) {
setUpgradeCompletedCondition(toUpdate, StateFailed, corev1.ConditionFalse, err.Error(), "")
return h.upgradeClient.Update(toUpdate)
}
toUpdate.Labels[upgradeStateLabel] = StatePreparingNodes
toUpdate.Status.RepoInfo = repoInfoStr
harvesterv1.NodesPrepared.CreateUnknownIfNotExists(toUpdate)
return h.upgradeClient.Update(toUpdate)
}
if harvesterv1.NodesPrepared.IsTrue(upgrade) && harvesterv1.SystemServicesUpgraded.GetStatus(upgrade) == "" {
logrus.Infof("upgrading system services %s/%s", upgrade.Namespace, upgrade.Name)
toUpdate := upgrade.DeepCopy()
repoInfo, err := getCachedRepoInfo(upgrade)
if err != nil {
setUpgradeCompletedCondition(toUpdate, StateFailed, corev1.ConditionFalse, err.Error(), "")
return h.upgradeClient.Update(toUpdate)
}
if _, err := h.jobClient.Create(applyManifestsJob(upgrade, repoInfo)); err != nil && !apierrors.IsAlreadyExists(err) {
setUpgradeCompletedCondition(toUpdate, StateFailed, corev1.ConditionFalse, err.Error(), "")
return h.upgradeClient.Update(toUpdate)
}
toUpdate.Labels[upgradeStateLabel] = StateUpgradingSystemServices
setHelmChartUpgradeStatus(toUpdate, corev1.ConditionUnknown, "", "")
return h.upgradeClient.Update(toUpdate)
}
logrus.Infof("testing upgrade nodes %s/%s: (%v, %v)", upgrade.Namespace, upgrade.Name, harvesterv1.SystemServicesUpgraded.IsTrue(upgrade), harvesterv1.NodesUpgraded.GetStatus(upgrade))
if harvesterv1.SystemServicesUpgraded.IsTrue(upgrade) && harvesterv1.NodesUpgraded.GetStatus(upgrade) == "" {
logrus.Infof("upgrading nodes %s/%s", upgrade.Namespace, upgrade.Name)
info, err := getCachedRepoInfo(upgrade)
if err != nil {
logrus.Errorf("failed to get cached repo %s/%s: %v", upgrade.Namespace, upgrade.Name, err)
return nil, err
}
toUpdate := upgrade.DeepCopy()
singleNodeName := upgrade.Status.SingleNode
if singleNodeName != "" {
<http://logrus.Info|logrus.Info>("Start single node upgrade job")
if _, err = h.jobClient.Create(applyNodeJob(upgrade, info, singleNodeName, upgradeJobTypeSingleNodeUpgrade)); err != nil && !apierrors.IsAlreadyExists(err) {
setUpgradeCompletedCondition(toUpdate, StateFailed, corev1.ConditionFalse, err.Error(), "")
return h.upgradeClient.Update(toUpdate)
}
} else {
// save the original value of replica-replenishment-wait-interval setting and extend it with a longer value
// skip if the value is already larger than extendedReplicaReplenishmentWaitInterval
replicaReplenishmentWaitIntervalValue, err := h.getReplicaReplenishmentValue()
if err != nil {
logrus.Errorf("failed to get replica replenishment %s/%s: %v", upgrade.Namespace, upgrade.Name, err)
return nil, err
}
if replicaReplenishmentWaitIntervalValue < extendedReplicaReplenishmentWaitInterval {
if err := h.saveReplicaReplenishmentToUpgradeAnnotation(toUpdate); err != nil {
return nil, err
}
if err := h.setReplicaReplenishmentValue(extendedReplicaReplenishmentWaitInterval); err != nil {
logrus.Errorf("failed to set replica replenishment %s/%s: %v", upgrade.Namespace, upgrade.Name, err)
return nil, err
}
}
// go with RKE2 pre-drain/post-drain hooks
logrus.Infof("Start upgrading Kubernetes runtime to %s", info.Release.Kubernetes)
if err := h.upgradeKubernetes(info.Release.Kubernetes); err != nil {
logrus.Errorf("failed to upgrade k8s %s/%s: %v", upgrade.Namespace, upgrade.Name, err)
setUpgradeCompletedCondition(toUpdate, StateFailed, corev1.ConditionFalse, err.Error(), "")
return h.upgradeClient.Update(toUpdate)
}
}
logrus.Infof("marking current state for upgrading nodes %s/%s", upgrade.Namespace, upgrade.Name)
toUpdate.Labels[upgradeStateLabel] = StateUpgradingNodes
harvesterv1.NodesUpgraded.CreateUnknownIfNotExists(toUpdate)
return h.upgradeClient.Update(toUpdate)
}
logrus.Infof("no conditions met %s/%s", upgrade.Namespace, upgrade.Name)
return upgrade, nil
}
miniature-advantage-78722
02/22/2024, 9:58 PMtime="2024-02-22T21:49:38Z" level=info msg="handle upgrade harvester-system/hvst-upgrade-z2m89 with labels map[<http://harvesterhci.io/latestUpgrade:true|harvesterhci.io/latestUpgrade:true> <http://harvesterhci.io/upgradeState:UpgradingNodes|harvesterhci.io/upgradeState:UpgradingNodes>]"
time="2024-02-22T21:49:38Z" level=info msg="testing upgrade nodes harvester-system/hvst-upgrade-z2m89: (true, Unknown)"
time="2024-02-22T21:49:38Z" level=info msg="no conditions met harvester-system/hvst-upgrade-z2m89"
time="2024-02-22T21:51:17Z" level=info msg="handle upgrade harvester-system/hvst-upgrade-z2m89 with labels map[<http://harvesterhci.io/latestUpgrade:true|harvesterhci.io/latestUpgrade:true> <http://harvesterhci.io/upgradeState:UpgradingNodes|harvesterhci.io/upgradeState:UpgradingNodes>]"
time="2024-02-22T21:51:17Z" level=info msg="testing upgrade nodes harvester-system/hvst-upgrade-z2m89: (true, )"
time="2024-02-22T21:51:17Z" level=info msg="upgrading nodes harvester-system/hvst-upgrade-z2m89"
time="2024-02-22T21:51:17Z" level=info msg="Start upgrading Kubernetes runtime to v1.25.9+rke2r1"
time="2024-02-22T21:51:17Z" level=info msg="marking current state for upgrading nodes harvester-system/hvst-upgrade-z2m89"
time="2024-02-22T21:51:17Z" level=info msg="handle upgrade harvester-system/hvst-upgrade-z2m89 with labels map[<http://harvesterhci.io/latestUpgrade:true|harvesterhci.io/latestUpgrade:true> <http://harvesterhci.io/upgradeState:UpgradingNodes|harvesterhci.io/upgradeState:UpgradingNodes>]"
time="2024-02-22T21:51:17Z" level=info msg="testing upgrade nodes harvester-system/hvst-upgrade-z2m89: (true, Unknown)"
time="2024-02-22T21:51:17Z" level=info msg="no conditions met harvester-system/hvst-upgrade-z2m89"
rke config
spec:
kubernetesVersion: v1.25.9+rke2r1
localClusterAuthEndpoint: {}
rkeConfig:
chartValues: null
machineGlobalConfig: null
provisionGeneration: 3
upgradeStrategy:
controlPlaneConcurrency: "1"
controlPlaneDrainOptions:
deleteEmptyDirData: true
disableEviction: false
enabled: true
force: true
gracePeriod: 0
ignoreDaemonSets: true
postDrainHooks:
- annotation: <http://harvesterhci.io/post-hook|harvesterhci.io/post-hook>
preDrainHooks:
- annotation: <http://harvesterhci.io/pre-hook|harvesterhci.io/pre-hook>
skipWaitForDeleteTimeoutSeconds: 0
timeout: 0
workerConcurrency: "1"
workerDrainOptions:
deleteEmptyDirData: true
disableEviction: false
enabled: true
force: true
gracePeriod: 0
ignoreDaemonSets: true
postDrainHooks:
- annotation: <http://harvesterhci.io/post-hook|harvesterhci.io/post-hook>
preDrainHooks:
- annotation: <http://harvesterhci.io/pre-hook|harvesterhci.io/pre-hook>
skipWaitForDeleteTimeoutSeconds: 0
timeout: 0