https://rancher.com/ logo
Title
c

calm-egg-14566

07/30/2022, 3:25 AM
Hi everyone! I stuck with deploying RKE cluster to VM that created from Vagrant. When I ran command
rke up
it failed with different error messages. Here is my configuration of
cluster.yml
# mainatiner: <mailto:nick.reva@snap.com|nick.reva@snap.com>
nodes: # !DO_NOT_UPDATE
- address: 192.168.50.101
  port: "22"
  internal_address: 192.168.50.101
  role:
  - controlplane
  - etcd
  - worker
  hostname_override: node1
  user: root
  docker_socket: /var/run/docker.sock
  ssh_key: ""
  ssh_key_path: ~/.ssh/id_rsa
  ssh_cert: ""
  ssh_cert_path: ""
  labels: {}
  taints: []
#- address: 192.168.50.102
#  port: "22"
#  internal_address: 192.168.50.102
#  role:
#  - worker
#  hostname_override: node2
#  user: root
#  docker_socket: /var/run/docker.sock
#  ssh_key: ""
#  ssh_key_path: ~/.ssh/id_rsa
#  ssh_cert: ""
#  ssh_cert_path: ""
#  labels: {}
#  taints: []
services:
  etcd:
    image: ""
    extra_args: {}
    extra_binds: []
    extra_env: []
    win_extra_args: {}
    win_extra_binds: []
    win_extra_env: []
    external_urls: []
    ca_cert: ""
    cert: ""
    key: ""
    path: ""
    uid: 52034
    gid: 52034
    snapshot: null
    retention: ""
    creation: ""
    backup_config: null
  kube-api:
    image: ""
    extra_args: {}
    extra_binds: []
    extra_env: []
    win_extra_args: {}
    win_extra_binds: []
    win_extra_env: []
    service_cluster_ip_range: 10.43.0.0/16
    service_node_port_range: ""
    pod_security_policy: true
    always_pull_images: false
    secrets_encryption_config:
      enabled: true
    audit_log:
      enabled: true
    admission_configuration:
    event_rate_limit:
      enabled: true
  kube-controller:
    image: ""
    extra_args: #TODO 
      feature-gates: RotateKubeletServerCertificate=true
    extra_binds: []
    extra_env: []
    win_extra_args: {}
    win_extra_binds: []
    win_extra_env: []
    cluster_cidr: 10.42.0.0/16
    service_cluster_ip_range: 10.43.0.0/16
  scheduler:
    image: ""
    extra_args: {}
    extra_binds: []
    extra_env: []
    win_extra_args: {}
    win_extra_binds: []
    win_extra_env: []
  kubelet:
    image: ""
    extra_args: #TODO
      feature-gates: RotateKubeletServerCertificate=true
      protect-kernel-defaults: "true"
      tls-cipher-suites: TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_GCM_SHA256
    extra_binds: []
    extra_env: []
    win_extra_args: {}
    win_extra_binds: []
    win_extra_env: []
    cluster_domain: cluster.local
    infra_container_image: ""
    cluster_dns_server: 10.43.0.10
    fail_swap_on: false
    generate_serving_certificate: true
  kubeproxy:
    image: ""
    extra_args: {}
    extra_binds: []
    extra_env: []
    win_extra_args: {}
    win_extra_binds: []
    win_extra_env: []
network:
  plugin: calico # !DO_NOT_UPDATE
  options: {}
  mtu: 0
  node_selector: {}
  update_strategy: null
  tolerations: [] # !DO_NOT_UPDATE
authentication: # !DO_NOT_UPDATE
  strategy: x509 # !DO_NOT_UPDATE
  sans: []
  webhook: null
addons: |
  ---
  apiVersion: v1
  kind: Namespace
  metadata:
    name: ingress-nginx
  ---
  apiVersion: <http://rbac.authorization.k8s.io/v1|rbac.authorization.k8s.io/v1>
  kind: Role
  metadata:
    name: default-psp-role
    namespace: ingress-nginx
  rules:
  - apiGroups:
    - extensions
    resourceNames:
    - default-psp
    resources:
    - podsecuritypolicies
    verbs:
    - use
  ---
  apiVersion: <http://rbac.authorization.k8s.io/v1|rbac.authorization.k8s.io/v1>
  kind: RoleBinding
  metadata:
    name: default-psp-rolebinding
    namespace: ingress-nginx
  roleRef:
    apiGroup: <http://rbac.authorization.k8s.io|rbac.authorization.k8s.io>
    kind: Role
    name: default-psp-role
  subjects:
  - apiGroup: <http://rbac.authorization.k8s.io|rbac.authorization.k8s.io>
    kind: Group
    name: system:serviceaccounts
  - apiGroup: <http://rbac.authorization.k8s.io|rbac.authorization.k8s.io>
    kind: Group
    name: system:authenticated
  ---
  apiVersion: v1
  kind: Namespace
  metadata:
    name: cattle-system
  ---
  apiVersion: <http://rbac.authorization.k8s.io/v1|rbac.authorization.k8s.io/v1>
  kind: Role
  metadata:
    name: default-psp-role
    namespace: cattle-system
  rules:
  - apiGroups:
    - extensions
    resourceNames:
    - default-psp
    resources:
    - podsecuritypolicies
    verbs:
    - use
  ---
  apiVersion: <http://rbac.authorization.k8s.io/v1|rbac.authorization.k8s.io/v1>
  kind: RoleBinding
  metadata:
    name: default-psp-rolebinding
    namespace: cattle-system
  roleRef:
    apiGroup: <http://rbac.authorization.k8s.io|rbac.authorization.k8s.io>
    kind: Role
    name: default-psp-role
  subjects:
  - apiGroup: <http://rbac.authorization.k8s.io|rbac.authorization.k8s.io>
    kind: Group
    name: system:serviceaccounts
  - apiGroup: <http://rbac.authorization.k8s.io|rbac.authorization.k8s.io>
    kind: Group
    name: system:authenticated
  ---
  apiVersion: policy/v1beta1
  kind: PodSecurityPolicy
  metadata:
    name: restricted
  spec:
    requiredDropCapabilities:
    - NET_RAW
    privileged: false
    allowPrivilegeEscalation: false
    defaultAllowPrivilegeEscalation: false
    fsGroup:
      rule: RunAsAny
    runAsUser:
      rule: MustRunAsNonRoot
    seLinux:
      rule: RunAsAny
    supplementalGroups:
      rule: RunAsAny
    volumes:
    - emptyDir
    - secret
    - persistentVolumeClaim
    - downwardAPI
    - configMap
    - projected
  ---
  apiVersion: <http://rbac.authorization.k8s.io/v1|rbac.authorization.k8s.io/v1>
  kind: ClusterRole
  metadata:
    name: psp:restricted
  rules:
  - apiGroups:
    - extensions
    resourceNames:
    - restricted
    resources:
    - podsecuritypolicies
    verbs:
    - use
  ---
  apiVersion: <http://rbac.authorization.k8s.io/v1|rbac.authorization.k8s.io/v1>
  kind: ClusterRoleBinding
  metadata:
    name: psp:restricted
  roleRef:
    apiGroup: <http://rbac.authorization.k8s.io|rbac.authorization.k8s.io>
    kind: ClusterRole
    name: psp:restricted
  subjects:
  - apiGroup: <http://rbac.authorization.k8s.io|rbac.authorization.k8s.io>
    kind: Group
    name: system:serviceaccounts
  - apiGroup: <http://rbac.authorization.k8s.io|rbac.authorization.k8s.io>
    kind: Group
    name: system:authenticated
  ---
  apiVersion: v1
  kind: ServiceAccount
  metadata:
    name: tiller
    namespace: kube-system
  ---
  apiVersion: <http://rbac.authorization.k8s.io/v1|rbac.authorization.k8s.io/v1>
  kind: ClusterRoleBinding
  metadata:
    name: tiller
  roleRef:
    apiGroup: <http://rbac.authorization.k8s.io|rbac.authorization.k8s.io>
    kind: ClusterRole
    name: cluster-admin
  subjects:
  - kind: ServiceAccount
    name: tiller
    namespace: kube-system
addons_include: []
system_images: # !DO_NOT_UPDATE
  etcd: rancher/coreos-etcd:v3.4.14-rancher1
  alpine: rancher/rke-tools:v0.1.72
  nginx_proxy: rancher/rke-tools:v0.1.72
  cert_downloader: rancher/rke-tools:v0.1.72
  kubernetes_services_sidecar: rancher/rke-tools:v0.1.72
  kubedns: rancher/k8s-dns-kube-dns:1.15.10
  dnsmasq: rancher/k8s-dns-dnsmasq-nanny:1.15.10
  kubedns_sidecar: rancher/k8s-dns-sidecar:1.15.10
  kubedns_autoscaler: rancher/cluster-proportional-autoscaler:1.8.1
  coredns: rancher/coredns-coredns:1.8.0
  coredns_autoscaler: rancher/cluster-proportional-autoscaler:1.8.1
  nodelocal: rancher/k8s-dns-node-cache:1.15.13
  kubernetes: rancher/hyperkube:v1.20.4-rancher1
  flannel: rancher/coreos-flannel:v0.13.0-rancher1
  flannel_cni: rancher/flannel-cni:v0.3.0-rancher6
  calico_node: rancher/calico-node:v3.17.2
  calico_cni: rancher/calico-cni:v3.17.2
  calico_controllers: rancher/calico-kube-controllers:v3.17.2
  calico_ctl: rancher/calico-ctl:v3.17.2
  calico_flexvol: rancher/calico-pod2daemon-flexvol:v3.17.2
  canal_node: rancher/calico-node:v3.17.2
  canal_cni: rancher/calico-cni:v3.17.2
  canal_controllers: rancher/calico-kube-controllers:v3.17.2
  canal_flannel: rancher/coreos-flannel:v0.13.0-rancher1
  canal_flexvol: rancher/calico-pod2daemon-flexvol:v3.17.2
  weave_node: weaveworks/weave-kube:2.8.1
  weave_cni: weaveworks/weave-npc:2.8.1
  pod_infra_container: rancher/pause:3.2
  ingress: rancher/nginx-ingress-controller:nginx-0.43.0-rancher1
  ingress_backend: rancher/nginx-ingress-controller-defaultbackend:1.5-rancher1
  metrics_server: rancher/metrics-server:v0.4.1
  windows_pod_infra_container: rancher/kubelet-pause:v0.1.6
  aci_cni_deploy_container: noiro/cnideploy:5.1.1.0.1ae238a
  aci_host_container: noiro/aci-containers-host:5.1.1.0.1ae238a
  aci_opflex_container: noiro/opflex:5.1.1.0.1ae238a
  aci_mcast_container: noiro/opflex:5.1.1.0.1ae238a
  aci_ovs_container: noiro/openvswitch:5.1.1.0.1ae238a
  aci_controller_container: noiro/aci-containers-controller:5.1.1.0.1ae238a
  aci_gbp_server_container: noiro/gbp-server:5.1.1.0.1ae238a
  aci_opflex_server_container: noiro/opflex-server:5.1.1.0.1ae238a
ssh_key_path: ~/.ssh/id_rsa
ssh_cert_path: ""
ssh_agent_auth: false
authorization:
  mode: rbac # !DO_NOT_UPDATE
  options: {}
ignore_docker_version: null
kubernetes_version: "" # !DO_NOT_UPDATE
private_registries: []
ingress:
  provider: ""
  options: {}
  node_selector: {}
  extra_args: {}
  dns_policy: ""
  extra_envs: []
  extra_volumes: []
  extra_volume_mounts: []
  update_strategy: null
  http_port: 0
  https_port: 0
  network_mode: ""
  tolerations: [] # !DO_NOT_UPDATE
  default_backend: null
  default_http_backend_priority_class_name: ""
  nginx_ingress_controller_priority_class_name: ""
cluster_name: "" # !DO_NOT_UPDATE
cloud_provider:
  name: ""
prefix_path: ""
win_prefix_path: ""
addon_job_timeout: 30
bastion_host:
  address: ""
  port: ""
  user: ""
  ssh_key: ""
  ssh_key_path: ""
  ssh_cert: ""
  ssh_cert_path: ""
monitoring:
  provider: ""
  options: {}
  node_selector: {}
  update_strategy: null
  replicas: null
  tolerations: [] # !DO_NOT_UPDATE
  metrics_server_priority_class_name: ""
restore:
  restore: false
  snapshot_name: ""
rotate_encryption_key: false
dns: null
Here are the errors I’ve found first ran
FATA[0096] Failed to get job complete status for job rke-network-plugin-deploy-job in namespace kube-system
second ran
ERRO[0091] Host node1 failed to report Ready status with error: host node1 not ready
INFO[0091] [controlplane] Processing controlplane hosts for upgrade 1 at a time
INFO[0091] Processing controlplane host node1
INFO[0091] [controlplane] Now checking status of node node1, try #1
INFO[0096] [controlplane] Now checking status of node node1, try #2
INFO[0101] [controlplane] Now checking status of node node1, try #3
INFO[0106] [controlplane] Now checking status of node node1, try #4
INFO[0111] [controlplane] Now checking status of node node1, try #5
ERRO[0116] Failed to upgrade hosts: node1 with error [host node1 not ready]
FATA[0116] [controlPlane] Failed to upgrade Control Plane: [[host node1 not ready]]
Please advise and Thank you in advanced. 🙏
a

agreeable-oil-87482

07/30/2022, 7:49 AM
SSH to 192.168.50.101 and check the state/logs for the CNI containers
c

calm-egg-14566

07/31/2022, 7:44 AM
and when I check the pod detail with
describe
flag
Name:         nginx-ingress-controller-7278k
Namespace:    ingress-nginx
Priority:     0
Node:         node1/10.0.2.15
Start Time:   Sun, 31 Jul 2022 14:42:26 +0700
Labels:       app=ingress-nginx
              <http://app.kubernetes.io/component=controller|app.kubernetes.io/component=controller>
              <http://app.kubernetes.io/instance=ingress-nginx|app.kubernetes.io/instance=ingress-nginx>
              <http://app.kubernetes.io/name=ingress-nginx|app.kubernetes.io/name=ingress-nginx>
              controller-revision-hash=6d48c5f9b4
              pod-template-generation=1
Annotations:  <http://cni.projectcalico.org/podIP|cni.projectcalico.org/podIP>: 10.42.166.135/32
              <http://cni.projectcalico.org/podIPs|cni.projectcalico.org/podIPs>: 10.42.166.135/32
Status:       Running
IP:           10.42.166.135
IPs:
  IP:           10.42.166.135
Controlled By:  DaemonSet/nginx-ingress-controller
Containers:
  controller:
    Container ID:  <docker://c5ce66b6635f8a3190605f5ffe2b88e31115f69611423ef7d28bc80c8b28621>3
    Image:         rancher/nginx-ingress-controller:nginx-0.43.0-rancher1
    Image ID:      <docker-pullable://rancher/nginx-ingress-controller@sha256:677fb1a51ceb290f503fd44e6e27cf020813cf36d6822eea0ec8edff73002595>
    Ports:         80/TCP, 443/TCP, 8443/TCP
    Host Ports:    80/TCP, 443/TCP, 0/TCP
    Args:
      /nginx-ingress-controller
      --election-id=ingress-controller-leader-nginx
      --controller-class=<http://k8s.io/ingress-nginx|k8s.io/ingress-nginx>
      --configmap=$(POD_NAMESPACE)/ingress-nginx-controller
      --tcp-services-configmap=$(POD_NAMESPACE)/tcp-services
      --udp-services-configmap=$(POD_NAMESPACE)/udp-services
      --validating-webhook=:8443
      --validating-webhook-certificate=/usr/local/certificates/cert
      --validating-webhook-key=/usr/local/certificates/key
    State:          Waiting
      Reason:       CrashLoopBackOff
    Last State:     Terminated
      Reason:       Error
      Exit Code:    2
      Started:      Sun, 31 Jul 2022 14:42:50 +0700
      Finished:     Sun, 31 Jul 2022 14:42:50 +0700
    Ready:          False
    Restart Count:  2
    Liveness:       http-get http://:10254/healthz delay=10s timeout=1s period=10s #success=1 #failure=5
    Readiness:      http-get http://:10254/healthz delay=10s timeout=1s period=10s #success=1 #failure=3
    Environment:
      POD_NAME:       nginx-ingress-controller-7278k (v1:metadata.name)
      POD_NAMESPACE:  ingress-nginx (v1:metadata.namespace)
      LD_PRELOAD:     /usr/local/lib/libmimalloc.so
    Mounts:
      /usr/local/certificates/ from webhook-cert (ro)
      /var/run/secrets/kubernetes.io/serviceaccount from ingress-nginx-token-hx5zj (ro)
Conditions:
  Type              Status
  Initialized       True
  Ready             False
  ContainersReady   False
  PodScheduled      True
Volumes:
  webhook-cert:
    Type:        Secret (a volume populated by a Secret)
    SecretName:  ingress-nginx-admission
    Optional:    false
  ingress-nginx-token-hx5zj:
    Type:        Secret (a volume populated by a Secret)
    SecretName:  ingress-nginx-token-hx5zj
    Optional:    false
QoS Class:       BestEffort
Node-Selectors:  <none>
Tolerations:     :NoExecute op=Exists
                 :NoSchedule op=Exists
                 <http://node.kubernetes.io/disk-pressure:NoSchedule|node.kubernetes.io/disk-pressure:NoSchedule> op=Exists
                 <http://node.kubernetes.io/memory-pressure:NoSchedule|node.kubernetes.io/memory-pressure:NoSchedule> op=Exists
                 <http://node.kubernetes.io/not-ready:NoExecute|node.kubernetes.io/not-ready:NoExecute> op=Exists
                 <http://node.kubernetes.io/pid-pressure:NoSchedule|node.kubernetes.io/pid-pressure:NoSchedule> op=Exists
                 <http://node.kubernetes.io/unreachable:NoExecute|node.kubernetes.io/unreachable:NoExecute> op=Exists
                 <http://node.kubernetes.io/unschedulable:NoSchedule|node.kubernetes.io/unschedulable:NoSchedule> op=Exists
Events:
  Type     Reason       Age                From               Message
  ----     ------       ----               ----               -------
  Normal   Scheduled    42s                default-scheduler  Successfully assigned ingress-nginx/nginx-ingress-controller-7278k to node1
  Warning  FailedMount  41s (x3 over 43s)  kubelet            MountVolume.SetUp failed for volume "webhook-cert" : secret "ingress-nginx-admission" not found
  Normal   Pulled       19s (x3 over 37s)  kubelet            Container image "rancher/nginx-ingress-controller:nginx-0.43.0-rancher1" already present on machine
  Normal   Created      19s (x3 over 37s)  kubelet            Created container controller
  Normal   Started      19s (x3 over 36s)  kubelet            Started container controller
  Warning  BackOff      12s (x5 over 35s)  kubelet            Back-off restarting failed container
Any idea?
When I ran
describe
pod for
metrics-server
Name:                 metrics-server-569c9689b5-4m6hl
Namespace:            kube-system
Priority:             2000000000
Priority Class Name:  system-cluster-critical
Node:                 node1/10.0.2.15
Start Time:           Sun, 31 Jul 2022 16:03:05 +0700
Labels:               k8s-app=metrics-server
                      pod-template-hash=569c9689b5
Annotations:          <http://cni.projectcalico.org/podIP|cni.projectcalico.org/podIP>: 10.42.166.140/32
                      <http://cni.projectcalico.org/podIPs|cni.projectcalico.org/podIPs>: 10.42.166.140/32
Status:               Running
IP:                   10.42.166.140
IPs:
  IP:           10.42.166.140
Controlled By:  ReplicaSet/metrics-server-569c9689b5
Containers:
  metrics-server:
    Container ID:  <docker://287df6f87e77758fd02235e1dee757fcec1f9a1fa0756b9015a49ed3ce9a9b3>6
    Image:         rancher/metrics-server:v0.4.1
    Image ID:      <docker-pullable://rancher/metrics-server@sha256:b99989f8b6a18a838737a155e0b7fd0fa237e239034a6bc9b6330879ad001aa1>
    Port:          4443/TCP
    Host Port:     0/TCP
    Args:
      --cert-dir=/tmp
      --secure-port=4443
      --kubelet-insecure-tls
      --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
      --metric-resolution=15s
    State:          Waiting
      Reason:       CrashLoopBackOff
    Last State:     Terminated
      Reason:       Completed
      Exit Code:    0
      Started:      Sun, 31 Jul 2022 16:11:16 +0700
      Finished:     Sun, 31 Jul 2022 16:11:42 +0700
    Ready:          False
    Restart Count:  7
    Requests:
      cpu:        100m
      memory:     200Mi
    Liveness:     http-get https://:https/livez delay=0s timeout=1s period=10s #success=1 #failure=3
    Readiness:    http-get https://:https/readyz delay=20s timeout=1s period=10s #success=1 #failure=3
    Environment:  <none>
    Mounts:
      /tmp from tmp-dir (rw)
      /var/run/secrets/kubernetes.io/serviceaccount from metrics-server-token-hk7lq (ro)
Conditions:
  Type              Status
  Initialized       True
  Ready             False
  ContainersReady   False
  PodScheduled      True
Volumes:
  tmp-dir:
    Type:       EmptyDir (a temporary directory that shares a pod's lifetime)
    Medium:
    SizeLimit:  <unset>
  metrics-server-token-hk7lq:
    Type:        Secret (a volume populated by a Secret)
    SecretName:  metrics-server-token-hk7lq
    Optional:    false
QoS Class:       Burstable
Node-Selectors:  <none>
Tolerations:     :NoExecute op=Exists
                 :NoSchedule op=Exists
Events:
  Type     Reason     Age                    From               Message
  ----     ------     ----                   ----               -------
  Normal   Scheduled  9m36s                  default-scheduler  Successfully assigned kube-system/metrics-server-569c9689b5-4m6hl to node1
  Normal   Created    8m40s (x3 over 9m35s)  kubelet            Created container metrics-server
  Normal   Started    8m38s (x3 over 9m35s)  kubelet            Started container metrics-server
  Warning  Unhealthy  8m11s (x3 over 9m11s)  kubelet            Readiness probe failed: HTTP probe failed with statuscode: 500
  Warning  Unhealthy  8m10s (x9 over 9m30s)  kubelet            Liveness probe failed: HTTP probe failed with statuscode: 500
  Normal   Killing    8m10s (x3 over 9m10s)  kubelet            Container metrics-server failed liveness probe, will be restarted
  Normal   Pulled     4m32s (x7 over 9m35s)  kubelet            Container image "rancher/metrics-server:v0.4.1" already present on machine
a

agreeable-oil-87482

07/31/2022, 9:25 AM
What does the Calico node pod log?
c

calm-egg-14566

07/31/2022, 9:35 AM
Name:                 calico-kube-controllers-7474cdd5f4-jlq4n
Namespace:            kube-system
Priority:             2000000000
Priority Class Name:  system-cluster-critical
Node:                 node1/10.0.2.15
Start Time:           Sun, 31 Jul 2022 16:03:34 +0700
Labels:               k8s-app=calico-kube-controllers
                      pod-template-hash=7474cdd5f4
Annotations:          <http://cni.projectcalico.org/podIP|cni.projectcalico.org/podIP>: 10.42.166.141/32
                      <http://cni.projectcalico.org/podIPs|cni.projectcalico.org/podIPs>: 10.42.166.141/32
                      <http://scheduler.alpha.kubernetes.io/critical-pod|scheduler.alpha.kubernetes.io/critical-pod>:
Status:               Running
IP:                   10.42.166.141
IPs:
  IP:           10.42.166.141
Controlled By:  ReplicaSet/calico-kube-controllers-7474cdd5f4
Containers:
  calico-kube-controllers:
    Container ID:   <docker://4bf1a938446d6c9c57611fc509a2980d6e12c6c2d4fe3756729122affe49c8b>a
    Image:          rancher/calico-kube-controllers:v3.17.2
    Image ID:       <docker-pullable://rancher/calico-kube-controllers@sha256:c1f71f8c9080ce0b2d9548702e5e8c9c131767b838c54de0e6c5ae5cdd5d493a>
    Port:           <none>
    Host Port:      <none>
    State:          Waiting
      Reason:       CrashLoopBackOff
    Last State:     Terminated
      Reason:       Error
      Exit Code:    2
      Started:      Sun, 31 Jul 2022 16:14:55 +0700
      Finished:     Sun, 31 Jul 2022 16:16:02 +0700
    Ready:          False
    Restart Count:  7
    Liveness:       exec [/usr/bin/check-status -l] delay=10s timeout=10s period=10s #success=1 #failure=6
    Readiness:      exec [/usr/bin/check-status -r] delay=0s timeout=1s period=10s #success=1 #failure=3
    Environment:
      ENABLED_CONTROLLERS:  node
      DATASTORE_TYPE:       kubernetes
    Mounts:
      /var/run/secrets/kubernetes.io/serviceaccount from calico-kube-controllers-token-q7qnd (ro)
Conditions:
  Type              Status
  Initialized       True
  Ready             False
  ContainersReady   False
  PodScheduled      True
Volumes:
  calico-kube-controllers-token-q7qnd:
    Type:        Secret (a volume populated by a Secret)
    SecretName:  calico-kube-controllers-token-q7qnd
    Optional:    false
QoS Class:       BestEffort
Node-Selectors:  <http://kubernetes.io/os=linux|kubernetes.io/os=linux>
Tolerations:     :NoSchedule op=Exists
                 :NoExecute op=Exists
                 CriticalAddonsOnly op=Exists
Events:
  Type     Reason     Age                 From               Message
  ----     ------     ----                ----               -------
  Normal   Scheduled  12m                 default-scheduler  Successfully assigned kube-system/calico-kube-controllers-7474cdd5f4-jlq4n to node1
  Normal   Pulled     10m (x3 over 12m)   kubelet            Container image "rancher/calico-kube-controllers:v3.17.2" already present on machine
  Normal   Created    10m (x3 over 12m)   kubelet            Created container calico-kube-controllers
  Normal   Started    10m (x3 over 12m)   kubelet            Started container calico-kube-controllers
  Warning  Unhealthy  10m (x14 over 12m)  kubelet            Liveness probe failed: unknown shorthand flag: 'l' in -l
unknown shorthand flag: 'l' in -l
Usage of check-status:
  -f, --file string   File to read with status information (default "/status/status.json")
  -r, --ready         Check readiness
  -v, --version       Display version
  Normal   Killing  7m45s (x5 over 11m)     kubelet  Container calico-kube-controllers failed liveness probe, will be restarted
  Warning  BackOff  2m40s (x17 over 6m45s)  kubelet  Back-off restarting failed container
a

agreeable-oil-87482

07/31/2022, 9:40 AM
Kubectl logs
c

calm-egg-14566

07/31/2022, 9:41 AM
2022-07-31 09:36:42.774 [INFO][1] main.go 88: Loaded configuration from environment config=&config.Config{LogLevel:"info", WorkloadEndpointWorkers:1, ProfileWorkers:1, PolicyWorkers:1, NodeWorkers:1, Kubeconfig:"", DatastoreType:"kubernetes"}
W0731 09:36:42.778258       1 client_config.go:543] Neither --kubeconfig nor --master was specified.  Using the inClusterConfig.  This might not work.
2022-07-31 09:36:42.781 [INFO][1] main.go 109: Ensuring Calico datastore is initialized
2022-07-31 09:36:42.806 [INFO][1] main.go 149: Getting initial config snapshot from datastore
2022-07-31 09:36:42.830 [INFO][1] main.go 152: Got initial config snapshot
2022-07-31 09:36:42.830 [INFO][1] watchersyncer.go 89: Start called
2022-07-31 09:36:42.830 [INFO][1] main.go 169: Starting status report routine
2022-07-31 09:36:42.830 [INFO][1] main.go 402: Starting controller ControllerType="Node"
2022-07-31 09:36:42.830 [INFO][1] node_controller.go 138: Starting Node controller
2022-07-31 09:36:42.832 [INFO][1] watchersyncer.go 127: Sending status update Status=wait-for-ready
2022-07-31 09:36:42.832 [INFO][1] node_syncer.go 40: Node controller syncer status updated: wait-for-ready
2022-07-31 09:36:42.832 [INFO][1] watchersyncer.go 147: Starting main event processing loop
2022-07-31 09:36:42.833 [INFO][1] watchercache.go 174: Full resync is required ListRoot="/calico/resources/v3/projectcalico.org/nodes"
2022-07-31 09:36:42.836 [INFO][1] resources.go 349: Main client watcher loop
2022-07-31 09:36:42.845 [INFO][1] watchercache.go 271: Sending synced update ListRoot="/calico/resources/v3/projectcalico.org/nodes"
2022-07-31 09:36:42.845 [INFO][1] watchersyncer.go 127: Sending status update Status=resync
2022-07-31 09:36:42.845 [INFO][1] node_syncer.go 40: Node controller syncer status updated: resync
2022-07-31 09:36:42.845 [INFO][1] watchersyncer.go 209: Received InSync event from one of the watcher caches
2022-07-31 09:36:42.845 [INFO][1] watchersyncer.go 221: All watchers have sync'd data - sending data and final sync
2022-07-31 09:36:42.845 [INFO][1] watchersyncer.go 127: Sending status update Status=in-sync
2022-07-31 09:36:42.845 [INFO][1] node_syncer.go 40: Node controller syncer status updated: in-sync
2022-07-31 09:36:42.862 [INFO][1] hostendpoints.go 90: successfully synced all hostendpoints
2022-07-31 09:36:42.931 [INFO][1] node_controller.go 151: Node controller is now running
2022-07-31 09:36:42.931 [INFO][1] ipam.go 45: Synchronizing IPAM data
2022-07-31 09:36:42.976 [INFO][1] ipam.go 191: Node and IPAM data is in sync
Here is the log messages when ran kube logs metrics-server
I0731 11:24:17.188133       1 requestheader_controller.go:169] Starting RequestHeaderAuthRequestController
I0731 11:24:17.189504       1 shared_informer.go:240] Waiting for caches to sync for RequestHeaderAuthRequestController
I0731 11:24:17.189658       1 configmap_cafile_content.go:202] Starting client-ca::kube-system::extension-apiserver-authentication::client-ca-file
I0731 11:24:17.189693       1 shared_informer.go:240] Waiting for caches to sync for client-ca::kube-system::extension-apiserver-authentication::client-ca-file
I0731 11:24:17.189850       1 configmap_cafile_content.go:202] Starting client-ca::kube-system::extension-apiserver-authentication::requestheader-client-ca-file
I0731 11:24:17.189923       1 shared_informer.go:240] Waiting for caches to sync for client-ca::kube-system::extension-apiserver-authentication::requestheader-client-ca-file
I0731 11:24:17.196957       1 dynamic_serving_content.go:130] Starting serving-cert::/tmp/apiserver.crt::/tmp/apiserver.key
I0731 11:24:17.204439       1 secure_serving.go:197] Serving securely on [::]:4443
I0731 11:24:17.204568       1 tlsconfig.go:240] Starting DynamicServingCertificateController
E0731 11:24:17.215731       1 server.go:132] unable to fully scrape metrics: unable to fully scrape metrics from node node1: unable to fetch metrics from node node1: request failed - "403 Forbidden".
I0731 11:24:17.290124       1 shared_informer.go:247] Caches are synced for client-ca::kube-system::extension-apiserver-authentication::client-ca-file
I0731 11:24:17.290164       1 shared_informer.go:247] Caches are synced for client-ca::kube-system::extension-apiserver-authentication::requestheader-client-ca-file
I0731 11:24:17.290255       1 shared_informer.go:247] Caches are synced for RequestHeaderAuthRequestController
E0731 11:24:32.101348       1 server.go:132] unable to fully scrape metrics: unable to fully scrape metrics from node node1: unable to fetch metrics from node node1: request failed - "403 Forbidden".
I0731 11:24:45.379338       1 requestheader_controller.go:183] Shutting down RequestHeaderAuthRequestController
I0731 11:24:45.379393       1 dynamic_serving_content.go:145] Shutting down serving-cert::/tmp/apiserver.crt::/tmp/apiserver.key
I0731 11:24:45.379416       1 configmap_cafile_content.go:223] Shutting down client-ca::kube-system::extension-apiserver-authentication::requestheader-client-ca-file
I0731 11:24:45.379458       1 configmap_cafile_content.go:223] Shutting down client-ca::kube-system::extension-apiserver-authentication::client-ca-file
I0731 11:24:45.379457       1 tlsconfig.go:255] Shutting down DynamicServingCertificateController
b

bulky-animal-18539

10/10/2022, 11:08 PM
@calm-egg-14566 - How did you solve the failing readiness probes and liveness probe issues for the nginx-ingress controller pods ?