diff --git a/config/config-dev.yaml b/config/config-dev.yaml index 431c1f92b..4b7c67fdd 100644 --- a/config/config-dev.yaml +++ b/config/config-dev.yaml @@ -85,6 +85,18 @@ clickhouse: # Default host_regexp to limit network connectivity from outside hostRegexpTemplate: "(chi-{chi}-[^.]+\\d+-\\d+|clickhouse\\-{chi})\\.{namespace}\\.svc\\.cluster\\.local$" + ################################################ + ## + ## Configuration remote_servers section + ## + ################################################ + remoteServers: + # Split remote_servers XML into multiple ConfigMaps when payload exceeds this threshold. + # 600 KiB payload plus XML metadata/wrappers keeps ConfigMap data safely below 1 MiB. + remoteServersSplitThresholdBytes: 614400 + # Maximum number of remote_servers fragments allowed for a CHI reconcile cycle. + maxRemoteServersFragments: 100 + ################################################ ## ## Configuration restart policy section diff --git a/config/config.yaml b/config/config.yaml index f7f6c1024..036337278 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -83,6 +83,18 @@ clickhouse: # Default host_regexp to limit network connectivity from outside hostRegexpTemplate: "(chi-{chi}-[^.]+\\d+-\\d+|clickhouse\\-{chi})\\.{namespace}\\.svc\\.cluster\\.local$" + ################################################ + ## + ## Configuration remote_servers section + ## + ################################################ + remoteServers: + # Split remote_servers XML into multiple ConfigMaps when payload exceeds this threshold. + # 600 KiB payload plus XML metadata/wrappers keeps ConfigMap data safely below 1 MiB. + remoteServersSplitThresholdBytes: 614400 + # Maximum number of remote_servers fragments allowed for a CHI reconcile cycle. + maxRemoteServersFragments: 100 + ################################################ ## ## Configuration restart policy section diff --git a/deploy/builder/templates-config/config.yaml b/deploy/builder/templates-config/config.yaml index 0226d778b..1f951be15 100644 --- a/deploy/builder/templates-config/config.yaml +++ b/deploy/builder/templates-config/config.yaml @@ -77,6 +77,18 @@ clickhouse: # Default host_regexp to limit network connectivity from outside hostRegexpTemplate: "(chi-{chi}-[^.]+\\d+-\\d+|clickhouse\\-{chi})\\.{namespace}\\.svc\\.cluster\\.local$" + ################################################ + ## + ## Configuration remote_servers section + ## + ################################################ + remoteServers: + # Split remote_servers XML into multiple ConfigMaps when payload exceeds this threshold. + # 600 KiB payload plus XML metadata/wrappers keeps ConfigMap data safely below 1 MiB. + remoteServersSplitThresholdBytes: 614400 + # Maximum number of remote_servers fragments allowed for a CHI reconcile cycle. + maxRemoteServersFragments: 100 + ################################################ ## ## Configuration restart policy section diff --git a/deploy/operator/clickhouse-operator-install-ansible.yaml b/deploy/operator/clickhouse-operator-install-ansible.yaml index b479269e8..732a72f42 100644 --- a/deploy/operator/clickhouse-operator-install-ansible.yaml +++ b/deploy/operator/clickhouse-operator-install-ansible.yaml @@ -4901,6 +4901,18 @@ data: # Default host_regexp to limit network connectivity from outside hostRegexpTemplate: "(chi-{chi}-[^.]+\\d+-\\d+|clickhouse\\-{chi})\\.{namespace}\\.svc\\.cluster\\.local$" + ################################################ + ## + ## Configuration remote_servers section + ## + ################################################ + remoteServers: + # Split remote_servers XML into multiple ConfigMaps when payload exceeds this threshold. + # 600 KiB payload plus XML metadata/wrappers keeps ConfigMap data safely below 1 MiB. + remoteServersSplitThresholdBytes: 614400 + # Maximum number of remote_servers fragments allowed for a CHI reconcile cycle. + maxRemoteServersFragments: 100 + ################################################ ## ## Configuration restart policy section diff --git a/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml b/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml index 214f7c7cb..81bb377c0 100644 --- a/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml +++ b/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml @@ -5100,6 +5100,18 @@ data: # Default host_regexp to limit network connectivity from outside hostRegexpTemplate: "(chi-{chi}-[^.]+\\d+-\\d+|clickhouse\\-{chi})\\.{namespace}\\.svc\\.cluster\\.local$" + ################################################ + ## + ## Configuration remote_servers section + ## + ################################################ + remoteServers: + # Split remote_servers XML into multiple ConfigMaps when payload exceeds this threshold. + # 600 KiB payload plus XML metadata/wrappers keeps ConfigMap data safely below 1 MiB. + remoteServersSplitThresholdBytes: 614400 + # Maximum number of remote_servers fragments allowed for a CHI reconcile cycle. + maxRemoteServersFragments: 100 + ################################################ ## ## Configuration restart policy section diff --git a/deploy/operator/clickhouse-operator-install-bundle.yaml b/deploy/operator/clickhouse-operator-install-bundle.yaml index 7686937fa..7a26271e5 100644 --- a/deploy/operator/clickhouse-operator-install-bundle.yaml +++ b/deploy/operator/clickhouse-operator-install-bundle.yaml @@ -5160,6 +5160,18 @@ data: # Default host_regexp to limit network connectivity from outside hostRegexpTemplate: "(chi-{chi}-[^.]+\\d+-\\d+|clickhouse\\-{chi})\\.{namespace}\\.svc\\.cluster\\.local$" + ################################################ + ## + ## Configuration remote_servers section + ## + ################################################ + remoteServers: + # Split remote_servers XML into multiple ConfigMaps when payload exceeds this threshold. + # 600 KiB payload plus XML metadata/wrappers keeps ConfigMap data safely below 1 MiB. + remoteServersSplitThresholdBytes: 614400 + # Maximum number of remote_servers fragments allowed for a CHI reconcile cycle. + maxRemoteServersFragments: 100 + ################################################ ## ## Configuration restart policy section diff --git a/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml b/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml index 28d2f6895..2af71931d 100644 --- a/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml +++ b/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml @@ -4847,6 +4847,18 @@ data: # Default host_regexp to limit network connectivity from outside hostRegexpTemplate: "(chi-{chi}-[^.]+\\d+-\\d+|clickhouse\\-{chi})\\.{namespace}\\.svc\\.cluster\\.local$" + ################################################ + ## + ## Configuration remote_servers section + ## + ################################################ + remoteServers: + # Split remote_servers XML into multiple ConfigMaps when payload exceeds this threshold. + # 600 KiB payload plus XML metadata/wrappers keeps ConfigMap data safely below 1 MiB. + remoteServersSplitThresholdBytes: 614400 + # Maximum number of remote_servers fragments allowed for a CHI reconcile cycle. + maxRemoteServersFragments: 100 + ################################################ ## ## Configuration restart policy section diff --git a/deploy/operator/clickhouse-operator-install-template.yaml b/deploy/operator/clickhouse-operator-install-template.yaml index d6750b1d8..967d0921d 100644 --- a/deploy/operator/clickhouse-operator-install-template.yaml +++ b/deploy/operator/clickhouse-operator-install-template.yaml @@ -4894,6 +4894,18 @@ data: # Default host_regexp to limit network connectivity from outside hostRegexpTemplate: "(chi-{chi}-[^.]+\\d+-\\d+|clickhouse\\-{chi})\\.{namespace}\\.svc\\.cluster\\.local$" + ################################################ + ## + ## Configuration remote_servers section + ## + ################################################ + remoteServers: + # Split remote_servers XML into multiple ConfigMaps when payload exceeds this threshold. + # 600 KiB payload plus XML metadata/wrappers keeps ConfigMap data safely below 1 MiB. + remoteServersSplitThresholdBytes: 614400 + # Maximum number of remote_servers fragments allowed for a CHI reconcile cycle. + maxRemoteServersFragments: 100 + ################################################ ## ## Configuration restart policy section diff --git a/deploy/operator/clickhouse-operator-install-tf.yaml b/deploy/operator/clickhouse-operator-install-tf.yaml index 6481ffdae..0e9b17235 100644 --- a/deploy/operator/clickhouse-operator-install-tf.yaml +++ b/deploy/operator/clickhouse-operator-install-tf.yaml @@ -4901,6 +4901,18 @@ data: # Default host_regexp to limit network connectivity from outside hostRegexpTemplate: "(chi-{chi}-[^.]+\\d+-\\d+|clickhouse\\-{chi})\\.{namespace}\\.svc\\.cluster\\.local$" + ################################################ + ## + ## Configuration remote_servers section + ## + ################################################ + remoteServers: + # Split remote_servers XML into multiple ConfigMaps when payload exceeds this threshold. + # 600 KiB payload plus XML metadata/wrappers keeps ConfigMap data safely below 1 MiB. + remoteServersSplitThresholdBytes: 614400 + # Maximum number of remote_servers fragments allowed for a CHI reconcile cycle. + maxRemoteServersFragments: 100 + ################################################ ## ## Configuration restart policy section diff --git a/docs/operator_configuration.md b/docs/operator_configuration.md index c38efd0e5..41789a32d 100644 --- a/docs/operator_configuration.md +++ b/docs/operator_configuration.md @@ -9,11 +9,12 @@ Common configuration typically contains general ClickHouse configuration section 1. ClickHouse user configuration files - ready-to-use XML files with sections of ClickHouse configuration **as-is** User configuration typically contains ClickHouse configuration sections with user accounts specifications. Those are exposed via config maps as well. 1. `ClickHouseOperatorConfiguration` resource. -1. `ClickHouseInstallationTemplate`s. Operator provides functionality to specify parts of `ClickHouseInstallation` manifest as a set of templates, which would be used in all `ClickHouseInstallation`s. +1. `ClickHouseInstallationTemplate`s. Operator provides functionality to specify parts of `ClickHouseInstallation` manifest as a set of templates, which would be used in all `ClickHouseInstallation`s. ## Operator settings Operator settings are initialized in-order from 3 sources: + * `/etc/clickhouse-operator/config.yaml` * etc-clickhouse-operator-files configmap (also a part of default [clickhouse-operator-install-bundle.yaml][clickhouse-operator-install-bundle.yaml] * `ClickHouseOperatorConfiguration` resource. See [example][70-chop-config.yaml] for details. @@ -115,6 +116,34 @@ chPassword: clickhouse_operator_password chPort: 8123 ``` +## `remote_servers` fragmentation + +Large CHIs can generate a very large `remote_servers.xml`. The operator can split this into deterministic fragments to keep ConfigMaps within Kubernetes size limits. + +Fragmentation knobs are configured under `clickhouse.configuration.remoteServers`: + +```yaml +clickhouse: + configuration: + remoteServers: + # Default: 600 * 1024 + # 600 KiB payload + XML metadata/wrappers keeps ConfigMap data safely below 1 MiB. + remoteServersSplitThresholdBytes: 614400 + + # Default: 100 + maxRemoteServersFragments: 100 +``` + +Behavior: + +1. If generated `remote_servers` fits into one fragment under threshold, the legacy file `chop-generated-remote_servers.xml` remains in `chi--common-configd`. +1. If threshold is exceeded, operator creates per-cluster fragment ConfigMaps: `chi---remote-servers-shard-`. +1. Fragment file names are `chop-generated-remote_servers-part-.xml`. +1. StatefulSet pod template annotation `clickhouse.altinity.com/remote-servers-hash` is updated from canonical fragment metadata, so relevant pods roll out deterministically. +1. During legacy fallback, stale fragment ConfigMaps are removed. + +ClickHouse merges all files from `config.d`, so multiple remote-server fragment files are applied automatically. + ## ClickHouse Installation settings Operator deploys ClickHouse clusters with different defaults, that can be configured in a flexible way. @@ -122,6 +151,7 @@ Operator deploys ClickHouse clusters with different defaults, that can be config ### Default ClickHouse configuration files Default ClickHouse configuration files can be found in the following config maps, that are mounted to corresponding configuration folders of ClickHouse pods: + * etc-clickhouse-operator-confd-files * etc-clickhouse-operator-configd-files * etc-clickhouse-operator-usersd-files @@ -131,6 +161,7 @@ Config maps are initialized in default [clickhouse-operator-install-bundle.yaml] ### Defaults for ClickHouseInstallation Defaults for ClickHouseInstallation can be provided by `ClickHouseInstallationTemplate` it a variety of ways: + * etc-clickhouse-operator-templatesd-files configmap * `ClickHouseInstallationTemplate` resources. @@ -153,6 +184,7 @@ spec: ``` Template needs to be deployed to some namespace, and later on used in the installation: + ``` apiVersion: "clickhouse.altinity.com/v1" kind: "ClickHouseInstallation" diff --git a/pkg/apis/clickhouse.altinity.com/v1/type_attributes.go b/pkg/apis/clickhouse.altinity.com/v1/type_attributes.go index c7643ee31..05ccbb36f 100644 --- a/pkg/apis/clickhouse.altinity.com/v1/type_attributes.go +++ b/pkg/apis/clickhouse.altinity.com/v1/type_attributes.go @@ -18,10 +18,17 @@ import core "k8s.io/api/core/v1" // ComparableAttributes specifies CHI attributes that are comparable type ComparableAttributes struct { - additionalEnvVars []core.EnvVar `json:"-" yaml:"-"` - additionalVolumes []core.Volume `json:"-" yaml:"-"` - additionalVolumeMounts []core.VolumeMount `json:"-" yaml:"-"` - skipOwnerRef bool `json:"-" yaml:"-"` + additionalEnvVars []core.EnvVar `json:"-" yaml:"-"` + additionalVolumes []core.Volume `json:"-" yaml:"-"` + additionalVolumeMounts []core.VolumeMount `json:"-" yaml:"-"` + remoteServersMounts []RemoteServersMount `json:"-" yaml:"-"` + additionalPodTemplateAnnotations map[string]string `json:"-" yaml:"-"` + skipOwnerRef bool `json:"-" yaml:"-"` +} + +type RemoteServersMount struct { + ConfigMapName string + FileName string } func (a *ComparableAttributes) GetAdditionalEnvVars() []core.EnvVar { @@ -136,6 +143,44 @@ func (a *ComparableAttributes) GetSkipOwnerRef() bool { return a.skipOwnerRef } +func (a *ComparableAttributes) SetRemoteServersMounts(mounts []RemoteServersMount) { + if a == nil { + return + } + a.remoteServersMounts = mounts +} + +func (a *ComparableAttributes) GetRemoteServersMounts() []RemoteServersMount { + if a == nil { + return nil + } + return a.remoteServersMounts +} + +func (a *ComparableAttributes) SetAdditionalPodTemplateAnnotation(key, value string) { + if a == nil { + return + } + if a.additionalPodTemplateAnnotations == nil { + a.additionalPodTemplateAnnotations = make(map[string]string) + } + a.additionalPodTemplateAnnotations[key] = value +} + +func (a *ComparableAttributes) DeleteAdditionalPodTemplateAnnotation(key string) { + if a == nil || a.additionalPodTemplateAnnotations == nil { + return + } + delete(a.additionalPodTemplateAnnotations, key) +} + +func (a *ComparableAttributes) GetAdditionalPodTemplateAnnotations() map[string]string { + if a == nil { + return nil + } + return a.additionalPodTemplateAnnotations +} + func (a *ComparableAttributes) SetSkipOwnerRef(skip bool) { if a == nil { return diff --git a/pkg/apis/clickhouse.altinity.com/v1/type_configuration_chop.go b/pkg/apis/clickhouse.altinity.com/v1/type_configuration_chop.go index d8117e739..361b6c23f 100644 --- a/pkg/apis/clickhouse.altinity.com/v1/type_configuration_chop.go +++ b/pkg/apis/clickhouse.altinity.com/v1/type_configuration_chop.go @@ -81,6 +81,11 @@ const ( // defaultMetricsTablesRegexp specifies default regexp to match tables in system database to fetch metrics from defaultMetricsTablesRegexp = "^(metrics|custom_metrics)$" + // Default remote_servers fragmentation settings. + // 600KiB payload + XML metadata/wrappers keeps resulting ConfigMap data safely below 1MiB limit. + defaultRemoteServersSplitThresholdBytes = 600 * 1024 + defaultRemoteServersMaxFragments = 100 + // defaultReconcileCHIsThreadsNumber specifies default number of controller threads running concurrently. // Used in case no other specified in config defaultReconcileCHIsThreadsNumber = 1 @@ -225,11 +230,29 @@ type OperatorConfigConfig struct { User OperatorConfigUser `json:"user" yaml:"user"` + RemoteServers OperatorConfigRemoteServers `json:"remoteServers" yaml:"remoteServers"` + Network struct { HostRegexpTemplate string `json:"hostRegexpTemplate" yaml:"hostRegexpTemplate"` } `json:"network" yaml:"network"` } +// OperatorConfigRemoteServers specifies remote_servers fragmentation behavior. +type OperatorConfigRemoteServers struct { + RemoteServersSplitThresholdBytes int `json:"remoteServersSplitThresholdBytes" yaml:"remoteServersSplitThresholdBytes"` + MaxRemoteServersFragments int `json:"maxRemoteServersFragments" yaml:"maxRemoteServersFragments"` +} + +// SplitThresholdBytes returns configured remote_servers fragment payload threshold in bytes. +func (c OperatorConfigConfig) SplitThresholdBytes() int { + return c.RemoteServers.RemoteServersSplitThresholdBytes +} + +// MaxFragments returns configured maximum allowed number of remote_servers fragments. +func (c OperatorConfigConfig) MaxFragments() int { + return c.RemoteServers.MaxRemoteServersFragments +} + // OperatorConfigRestartPolicyRuleSet specifies set of rules type OperatorConfigRestartPolicyRuleSet map[types.Matchable]types.StringBool @@ -1079,6 +1102,15 @@ func (c *OperatorConfig) normalizeSectionClickHouseConfigurationUserDefault() { // chConfigNetworksHostRegexpTemplate } +func (c *OperatorConfig) normalizeSectionClickHouseConfigurationRemoteServers() { + if c.ClickHouse.Config.RemoteServers.RemoteServersSplitThresholdBytes <= 0 { + c.ClickHouse.Config.RemoteServers.RemoteServersSplitThresholdBytes = defaultRemoteServersSplitThresholdBytes + } + if c.ClickHouse.Config.RemoteServers.MaxRemoteServersFragments <= 0 { + c.ClickHouse.Config.RemoteServers.MaxRemoteServersFragments = defaultRemoteServersMaxFragments + } +} + func (c *OperatorConfig) normalizeSectionClickHouseAccess() { // Username and Password to be used by operator to connect to ClickHouse instances for // 1. Metrics requests @@ -1199,6 +1231,7 @@ func (c *OperatorConfig) normalize() { c.normalizeSectionClickHouseConfigurationFile() c.normalizeSectionClickHouseConfigurationUserDefault() + c.normalizeSectionClickHouseConfigurationRemoteServers() c.normalizeSectionClickHouseAccess() c.normalizeSectionClickHouseMetrics() c.normalizeSectionKeeperConfigurationFile() diff --git a/pkg/controller/chi/controller-deleter.go b/pkg/controller/chi/controller-deleter.go index 06ad5494d..fe87d153c 100644 --- a/pkg/controller/chi/controller-deleter.go +++ b/pkg/controller/chi/controller-deleter.go @@ -16,13 +16,16 @@ package chi import ( "context" + apps "k8s.io/api/apps/v1" core "k8s.io/api/core/v1" apiErrors "k8s.io/apimachinery/pkg/api/errors" meta "k8s.io/apimachinery/pkg/apis/meta/v1" log "github.com/altinity/clickhouse-operator/pkg/announcer" + clickhouseAltinityCom "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com" api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" + "github.com/altinity/clickhouse-operator/pkg/controller" "github.com/altinity/clickhouse-operator/pkg/controller/common/storage" "github.com/altinity/clickhouse-operator/pkg/interfaces" ) @@ -77,6 +80,29 @@ func (c *Controller) deleteConfigMapsCHI(ctx context.Context, chi *api.ClickHous log.V(1).M(chi).F().Error("FAIL delete ConfigMap %s/%s err:%v", chi.Namespace, configMapCommonUsersName, err) } + opts := controller.NewListOptions(map[string]string{ + clickhouseAltinityCom.APIGroupName + "/chi": chi.GetName(), + }) + configMaps, listErr := c.kube.ConfigMap().List(ctx, chi.GetNamespace(), opts) + if listErr == nil { + for i := range configMaps { + cm := &configMaps[i] + if _, ok := cm.GetLabels()[clickhouseAltinityCom.APIGroupName+"/remote-servers-shard"]; !ok { + continue + } + err = c.kube.ConfigMap().Delete(ctx, cm.GetNamespace(), cm.GetName()) + switch { + case err == nil: + log.V(1).M(chi).Info("OK delete ConfigMap %s/%s", cm.Namespace, cm.Name) + case apiErrors.IsNotFound(err): + log.V(1).M(chi).Info("NEUTRAL not found ConfigMap %s/%s", cm.Namespace, cm.Name) + err = nil + default: + log.V(1).M(chi).F().Error("FAIL delete ConfigMap %s/%s err:%v", cm.Namespace, cm.Name, err) + } + } + } + return err } diff --git a/pkg/controller/chi/worker-reconciler-chi.go b/pkg/controller/chi/worker-reconciler-chi.go index 22c9bcf8e..b30cb5675 100644 --- a/pkg/controller/chi/worker-reconciler-chi.go +++ b/pkg/controller/chi/worker-reconciler-chi.go @@ -251,6 +251,8 @@ func (w *worker) reconcileCRAuxObjectsPreliminary(ctx context.Context, cr *api.C cr.GetRuntime().LockCommonConfig() if err := w.reconcileConfigMapCommon(ctx, cr, w.options()); err != nil { w.a.F().Error("failed to reconcile config map common. err: %v", err) + cr.GetRuntime().UnlockCommonConfig() + return err } cr.GetRuntime().UnlockCommonConfig() @@ -315,6 +317,9 @@ func (w *worker) reconcileCRAuxObjectsFinal(ctx context.Context, cr *api.ClickHo cr.GetRuntime().LockCommonConfig() err = w.reconcileConfigMapCommon(ctx, cr) cr.GetRuntime().UnlockCommonConfig() + if err == nil { + err = w.deleteRemoteServersFragmentConfigMapsPostSTS(ctx, cr) + } w.includeAllHostsIntoCluster(ctx, cr) return err @@ -341,10 +346,34 @@ func (w *worker) reconcileConfigMapCommon( opts = options[0] } + chi, ok := cr.(*api.ClickHouseInstallation) + if !ok { + return nil + } + + // Fragment generation must be based on the full desired CR topology. + // Using reconcile-time exclusion selectors here may suppress fragments while hosts are still Requested. + fragments, createFragmentsErr := w.createRemoteServersFragments(chi, nil) + if createFragmentsErr != nil { + return createFragmentsErr + } + legacyMode := w.isLegacyRemoteServersMode(fragments) + w.applyRemoteServersRuntimeAttributes(chi, fragments, legacyMode) + + if !legacyMode { + if err := w.reconcileConfigMapRemoteServers(ctx, cr, fragments); err != nil { + return err + } + } + // ConfigMap common for all resources in CR // contains several sections, mapped as separated chopConfig files, // such as remote servers, zookeeper setup, etc configMapCommon := w.task.Creator().CreateConfigMap(interfaces.ConfigMapCommon, opts) + if !legacyMode { + delete(configMapCommon.Data, remoteServersLegacyFilename) + } + err := w.reconcileConfigMap(ctx, cr, configMapCommon) if err == nil { w.task.RegistryReconciled().RegisterConfigMap(configMapCommon.GetObjectMeta()) diff --git a/pkg/controller/chi/worker-remote-servers.go b/pkg/controller/chi/worker-remote-servers.go new file mode 100644 index 000000000..6fa370a88 --- /dev/null +++ b/pkg/controller/chi/worker-remote-servers.go @@ -0,0 +1,229 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package chi + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "strconv" + "strings" + + apiErrors "k8s.io/apimachinery/pkg/api/errors" + + clickhouseAltinityCom "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com" + api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" + "github.com/altinity/clickhouse-operator/pkg/chop" + "github.com/altinity/clickhouse-operator/pkg/controller" + "github.com/altinity/clickhouse-operator/pkg/interfaces" + "github.com/altinity/clickhouse-operator/pkg/model/chi/config" + chiCreator "github.com/altinity/clickhouse-operator/pkg/model/chi/creator" + "github.com/altinity/clickhouse-operator/pkg/model/managers" +) + +const ( + remoteServersLegacyFilename = "chop-generated-remote_servers.xml" + remoteServersHashAnnotation = clickhouseAltinityCom.APIGroupName + "/remote-servers-hash" +) + +func (w *worker) createRemoteServersFragments( + cr *api.ClickHouseInstallation, + opts *config.FilesGeneratorOptions, +) ([]interfaces.RemoteServersFragment, error) { + generator := managers.NewConfigFilesGenerator(managers.FilesGeneratorTypeClickHouse, cr, configGeneratorOptions(cr)) + fragments, err := generator.CreateRemoteServersFragments(opts) + if err != nil { + return nil, err + } + return fragments, nil +} + +func (w *worker) isLegacyRemoteServersMode(fragments []interfaces.RemoteServersFragment) bool { + if len(fragments) == 0 { + return true + } + if len(fragments) != 1 { + return false + } + return fragments[0].TotalBytes <= chop.Config().ClickHouse.Config.SplitThresholdBytes() +} + +func (w *worker) remoteServersHash(fragments []interfaces.RemoteServersFragment) string { + b := &strings.Builder{} + b.WriteString(strconv.Itoa(len(fragments))) + b.WriteString("\n") + for _, fragment := range fragments { + b.WriteString(fragment.Cluster) + b.WriteString(":") + b.WriteString(strconv.Itoa(fragment.ShardStart)) + b.WriteString(":") + b.WriteString(strconv.Itoa(fragment.ShardEnd)) + b.WriteString(":") + b.WriteString(strconv.Itoa(fragment.PayloadBytes)) + b.WriteString(":") + b.WriteString(fragment.XML) + b.WriteString("\n") + } + h := sha256.Sum256([]byte(b.String())) + return hex.EncodeToString(h[:]) +} + +func (w *worker) applyRemoteServersRuntimeAttributes( + cr *api.ClickHouseInstallation, + fragments []interfaces.RemoteServersFragment, + legacyMode bool, +) { + attributes := cr.GetRuntime().GetAttributes() + + mounts := make([]api.RemoteServersMount, 0) + if !legacyMode { + for _, fragment := range fragments { + mounts = append(mounts, api.RemoteServersMount{ + ConfigMapName: w.c.namer.Name(interfaces.NameConfigMapRemoteServers, cr, fragment.Cluster, fragment.ShardStart), + FileName: chiCreator.FragmentFilenameByClusterAndShardStart(fragment.Cluster, fragment.ShardStart), + }) + } + } + attributes.SetRemoteServersMounts(mounts) + attributes.SetAdditionalPodTemplateAnnotation(remoteServersHashAnnotation, w.remoteServersHash(fragments)) +} + +func (w *worker) desiredRemoteServersFragmentConfigMapNames( + cr api.ICustomResource, + fragments []interfaces.RemoteServersFragment, + legacyMode bool, +) map[string]struct{} { + desired := make(map[string]struct{}) + if legacyMode { + return desired + } + for _, fragment := range fragments { + name := w.c.namer.Name(interfaces.NameConfigMapRemoteServers, cr, fragment.Cluster, fragment.ShardStart) + desired[name] = struct{}{} + } + return desired +} + +func (w *worker) reconcileConfigMapRemoteServers( + ctx context.Context, + cr api.ICustomResource, + fragments []interfaces.RemoteServersFragment, +) error { + for _, fragment := range fragments { + configMap := w.task.Creator().CreateConfigMap(interfaces.ConfigMapRemoteServers, fragment) + err := w.reconcileConfigMap(ctx, cr, configMap) + if err != nil { + w.task.RegistryFailed().RegisterConfigMap(configMap.GetObjectMeta()) + return err + } + w.task.RegistryReconciled().RegisterConfigMap(configMap.GetObjectMeta()) + } + return nil +} + +func (w *worker) deleteStaleRemoteServersFragmentConfigMaps( + ctx context.Context, + cr api.ICustomResource, + desired map[string]struct{}, +) error { + opts := controller.NewListOptions(map[string]string{ + clickhouseAltinityCom.APIGroupName + "/chi": cr.GetName(), + }) + configMaps, err := w.c.kube.ConfigMap().List(ctx, cr.GetNamespace(), opts) + if err != nil { + return err + } + + for i := range configMaps { + configMap := &configMaps[i] + if _, ok := configMap.GetLabels()[clickhouseAltinityCom.APIGroupName+"/remote-servers-shard"]; !ok { + continue + } + if _, ok := desired[configMap.GetName()]; ok { + continue + } + if err := w.c.kube.ConfigMap().Delete(ctx, configMap.GetNamespace(), configMap.GetName()); err != nil && !apiErrors.IsNotFound(err) { + return fmt.Errorf("failed to delete remote_servers fragment configmap %s/%s: %w", configMap.GetNamespace(), configMap.GetName(), err) + } + } + + return nil +} + +func (w *worker) deleteRemoteServersFragmentConfigMapsPostSTS(ctx context.Context, cr *api.ClickHouseInstallation) error { + if w.task.RegistryFailed().NumStatefulSet() > 0 { + w.a.V(1).M(cr).F().Info("Skip remote_servers fragment GC because StatefulSet reconcile has failures") + return nil + } + + fragments, err := w.createRemoteServersFragments(cr, nil) + if err != nil { + return err + } + legacyMode := w.isLegacyRemoteServersMode(fragments) + desiredHash := w.remoteServersHash(fragments) + hashApplied, err := w.isRemoteServersHashAppliedToAllStatefulSets(ctx, cr, desiredHash) + if err != nil { + return err + } + if !hashApplied { + w.a.V(1).M(cr).F().Info("Skip remote_servers fragment GC because StatefulSet hash is not yet applied to all hosts") + return nil + } + desired := w.desiredRemoteServersFragmentConfigMapNames(cr, fragments, legacyMode) + + return w.deleteStaleRemoteServersFragmentConfigMaps(ctx, cr, desired) +} + +func (w *worker) isRemoteServersHashAppliedToAllStatefulSets( + ctx context.Context, + cr *api.ClickHouseInstallation, + hash string, +) (bool, error) { + applied := true + errs := cr.WalkHosts(func(host *api.Host) error { + sts, getErr := w.c.kube.STS().Get(ctx, host) + if apiErrors.IsNotFound(getErr) { + return nil + } + if getErr != nil { + return getErr + } + + if sts == nil { + applied = false + return nil + } + + annotations := sts.Spec.Template.GetAnnotations() + if annotations == nil { + applied = false + return nil + } + + if annotations[remoteServersHashAnnotation] != hash { + applied = false + } + + return nil + }) + + if len(errs) > 0 { + return false, errs[0] + } + + return applied, nil +} diff --git a/pkg/controller/chi/worker_remote_servers_test.go b/pkg/controller/chi/worker_remote_servers_test.go new file mode 100644 index 000000000..c1a0d3626 --- /dev/null +++ b/pkg/controller/chi/worker_remote_servers_test.go @@ -0,0 +1,68 @@ +package chi + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/altinity/clickhouse-operator/pkg/chop" + "github.com/altinity/clickhouse-operator/pkg/interfaces" +) + +func TestIsLegacyRemoteServersMode(t *testing.T) { + w := &worker{} + + cfg := chop.Config() + originalThreshold := cfg.ClickHouse.Config.RemoteServers.RemoteServersSplitThresholdBytes + cfg.ClickHouse.Config.RemoteServers.RemoteServersSplitThresholdBytes = 100 + t.Cleanup(func() { + cfg.ClickHouse.Config.RemoteServers.RemoteServersSplitThresholdBytes = originalThreshold + }) + + tests := []struct { + name string + fragments []interfaces.RemoteServersFragment + expected bool + }{ + { + name: "zero fragments fallback to legacy mode", + fragments: []interfaces.RemoteServersFragment{}, + expected: true, + }, + { + name: "one fragment below threshold is legacy mode", + fragments: []interfaces.RemoteServersFragment{ + {TotalBytes: 99}, + }, + expected: true, + }, + { + name: "one fragment at threshold is legacy mode", + fragments: []interfaces.RemoteServersFragment{ + {TotalBytes: 100}, + }, + expected: true, + }, + { + name: "one fragment above threshold is fragmented mode", + fragments: []interfaces.RemoteServersFragment{ + {TotalBytes: 101}, + }, + expected: false, + }, + { + name: "multiple fragments are always fragmented mode", + fragments: []interfaces.RemoteServersFragment{ + {TotalBytes: 50}, + {TotalBytes: 50}, + }, + expected: false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + require.Equal(t, test.expected, w.isLegacyRemoteServersMode(test.fragments)) + }) + } +} diff --git a/pkg/interfaces/config_map.go b/pkg/interfaces/config_map.go index e6000550e..0ee614664 100644 --- a/pkg/interfaces/config_map.go +++ b/pkg/interfaces/config_map.go @@ -17,8 +17,9 @@ package interfaces type ConfigMapType string const ( - ConfigMapCommon ConfigMapType = "common" - ConfigMapCommonUsers ConfigMapType = "common users" - ConfigMapHost ConfigMapType = "host" - ConfigMapConfig ConfigMapType = "config" + ConfigMapCommon ConfigMapType = "common" + ConfigMapCommonUsers ConfigMapType = "common users" + ConfigMapHost ConfigMapType = "host" + ConfigMapRemoteServers ConfigMapType = "remote servers" + ConfigMapConfig ConfigMapType = "config" ) diff --git a/pkg/interfaces/interfaces-main.go b/pkg/interfaces/interfaces-main.go index b99828d62..24efe2050 100644 --- a/pkg/interfaces/interfaces-main.go +++ b/pkg/interfaces/interfaces-main.go @@ -33,6 +33,7 @@ type IConfigMapManager interface { type IConfigFilesGenerator interface { CreateConfigFiles(what FilesGroupType, params ...any) map[string]string + CreateRemoteServersFragments(options any) ([]RemoteServersFragment, error) } type INameManager interface { diff --git a/pkg/interfaces/name_type.go b/pkg/interfaces/name_type.go index b98acba1d..9857e6ec5 100644 --- a/pkg/interfaces/name_type.go +++ b/pkg/interfaces/name_type.go @@ -17,9 +17,10 @@ package interfaces type NameType string const ( - NameConfigMapHost NameType = "ConfigMapHost" - NameConfigMapCommon NameType = "ConfigMapCommon" - NameConfigMapCommonUsers NameType = "NameConfigMapCommonUsers" + NameConfigMapHost NameType = "ConfigMapHost" + NameConfigMapCommon NameType = "ConfigMapCommon" + NameConfigMapCommonUsers NameType = "NameConfigMapCommonUsers" + NameConfigMapRemoteServers NameType = "NameConfigMapRemoteServers" ) const ( NameCRService NameType = "NameCRService" diff --git a/pkg/interfaces/remote_servers.go b/pkg/interfaces/remote_servers.go new file mode 100644 index 000000000..70583804a --- /dev/null +++ b/pkg/interfaces/remote_servers.go @@ -0,0 +1,26 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package interfaces + +// RemoteServersFragment represents a deterministic remote_servers XML fragment. +type RemoteServersFragment struct { + Cluster string + ShardStart int + ShardEnd int + Index int + XML string + PayloadBytes int + TotalBytes int +} diff --git a/pkg/model/chi/config/files_generator.go b/pkg/model/chi/config/files_generator.go index bc108a70a..bc8f17db1 100644 --- a/pkg/model/chi/config/files_generator.go +++ b/pkg/model/chi/config/files_generator.go @@ -15,7 +15,10 @@ package config import ( + "fmt" + chi "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" + "github.com/altinity/clickhouse-operator/pkg/chop" "github.com/altinity/clickhouse-operator/pkg/interfaces" "github.com/altinity/clickhouse-operator/pkg/util" ) @@ -74,6 +77,66 @@ func (c *FilesGenerator) CreateConfigFiles(what interfaces.FilesGroupType, param return nil } +func (c *FilesGenerator) CreateRemoteServersFragments(options any) ([]interfaces.RemoteServersFragment, error) { + var opts *FilesGeneratorOptions + if options != nil { + opts = options.(*FilesGeneratorOptions) + } else { + opts = defaultFilesGeneratorOptions() + } + + domain, ok := c.configFilesGeneratorDomain.(*FilesGeneratorDomain) + if !ok { + return []interfaces.RemoteServersFragment{}, fmt.Errorf("unexpected files generator domain type: %T", c.configFilesGeneratorDomain) + } + + thresholdBytes := chop.Config().ClickHouse.Config.SplitThresholdBytes() + maxFragments := chop.Config().ClickHouse.Config.MaxFragments() + if thresholdBytes <= 0 { + return nil, fmt.Errorf("invalid remote_servers split threshold: %d", thresholdBytes) + } + if maxFragments <= 0 { + return nil, fmt.Errorf("invalid remote_servers max fragments: %d", maxFragments) + } + + selector := opts.GetRemoteServersOptions() + legacyXML := domain.configGenerator.getRemoteServers(selector) + if legacyXML == "" { + return []interfaces.RemoteServersFragment{}, nil + } + + if len(legacyXML) <= thresholdBytes { + return []interfaces.RemoteServersFragment{{ + Cluster: "", + ShardStart: 0, + ShardEnd: 0, + Index: 0, + XML: legacyXML, + PayloadBytes: len(legacyXML), + TotalBytes: len(legacyXML), + }}, nil + } + + topology := domain.configGenerator.buildRemoteServersTopology(selector) + fragments := make([]interfaces.RemoteServersFragment, 0) + for _, clusterTopology := range topology { + clusterFragments, err := domain.configGenerator.streamRemoteServersFragments(clusterTopology, thresholdBytes, maxFragments) + if err != nil { + return nil, err + } + fragments = append(fragments, clusterFragments...) + if len(fragments) > maxFragments { + return nil, fmt.Errorf("remote_servers fragments limit exceeded: total=%d max=%d", len(fragments), maxFragments) + } + } + if len(fragments) == 0 { + return nil, fmt.Errorf("remote_servers split requested but no fragments were generated") + } + + sortRemoteServersFragments(fragments) + return fragments, nil +} + // createConfigFilesGroupCommon creates common config files func (c *FilesGenerator) createConfigFilesGroupCommon(options *FilesGeneratorOptions) map[string]string { if options == nil { diff --git a/pkg/model/chi/config/files_generator_test.go b/pkg/model/chi/config/files_generator_test.go new file mode 100644 index 000000000..58bc8e22a --- /dev/null +++ b/pkg/model/chi/config/files_generator_test.go @@ -0,0 +1,43 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +type fakeFilesGeneratorDomain struct{} + +func (fakeFilesGeneratorDomain) CreateConfigFilesGroupCommon(configSections map[string]string, options *FilesGeneratorOptions) { +} + +func (fakeFilesGeneratorDomain) CreateConfigFilesGroupUsers(configSections map[string]string) { +} + +func (fakeFilesGeneratorDomain) CreateConfigFilesGroupHost(configSections map[string]string, options *FilesGeneratorOptions) { +} + +func TestCreateRemoteServersFragments_UnexpectedDomainType(t *testing.T) { + generator := &FilesGenerator{ + configFilesGeneratorDomain: fakeFilesGeneratorDomain{}, + } + + fragments, err := generator.CreateRemoteServersFragments(nil) + require.Error(t, err) + require.Empty(t, fragments) + require.Contains(t, err.Error(), "unexpected files generator domain type") +} diff --git a/pkg/model/chi/config/remote_servers_fragment.go b/pkg/model/chi/config/remote_servers_fragment.go new file mode 100644 index 000000000..19fa082d5 --- /dev/null +++ b/pkg/model/chi/config/remote_servers_fragment.go @@ -0,0 +1,275 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import ( + "bytes" + "fmt" + "sort" + "strings" + + chi "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" + commonConfig "github.com/altinity/clickhouse-operator/pkg/model/common/config" +) + +type remoteServersShard struct { + Index int + InternalReplication string + Weight *int + ReplicasXML []string +} + +type remoteServersTopology struct { + ClusterName string + SecretXML string + Shards []remoteServersShard +} + +func (c *Generator) buildRemoteServersTopology(selector *commonConfig.HostSelector) []remoteServersTopology { + if selector == nil { + selector = defaultSelectorIncludeAll() + } + + result := make([]remoteServersTopology, 0) + + c.cr.WalkClusters(func(cluster chi.ICluster) error { + if c.clusterHostsNum(cluster, selector) < 1 { + return nil + } + + t := remoteServersTopology{ + ClusterName: cluster.GetName(), + SecretXML: c.renderClusterSecret(cluster.GetSecret()), + } + + cluster.WalkShards(func(index int, shard chi.IShard) error { + if c.shardHostsNum(shard, selector) < 1 { + return nil + } + + r := make([]string, 0) + shard.WalkHosts(func(host *chi.Host) error { + if !selector.Include(host) { + return nil + } + r = append(r, c.renderReplicaXML(host)) + return nil + }) + if len(r) == 0 { + return nil + } + + sh := remoteServersShard{ + Index: index, + InternalReplication: shard.GetInternalReplication().String(), + ReplicasXML: r, + } + if shard.HasWeight() { + w := shard.GetWeight() + sh.Weight = &w + } + t.Shards = append(t.Shards, sh) + return nil + }) + + if len(t.Shards) > 0 { + result = append(result, t) + } + return nil + }) + + result = append(result, c.buildAutoTopologies(selector)...) + return result +} + +func (c *Generator) buildAutoTopologies(selector *commonConfig.HostSelector) []remoteServersTopology { + if c.chiHostsNum(selector) < 1 { + return nil + } + + res := make([]remoteServersTopology, 0) + + allReplicated := remoteServersTopology{ClusterName: OneShardAllReplicasClusterName} + replicas := make([]string, 0) + c.cr.WalkHosts(func(host *chi.Host) error { + if selector.Include(host) { + replicas = append(replicas, c.renderReplicaXML(host)) + } + return nil + }) + if len(replicas) > 0 { + allReplicated.Shards = append(allReplicated.Shards, remoteServersShard{ + Index: 0, + InternalReplication: "true", + ReplicasXML: replicas, + }) + res = append(res, allReplicated) + } + + allSharded := remoteServersTopology{ClusterName: AllShardsOneReplicaClusterName} + if firstCluster := c.cr.FindCluster(0); firstCluster != nil { + allSharded.SecretXML = c.renderClusterSecret(firstCluster.GetSecret()) + } + index := 0 + c.cr.WalkHosts(func(host *chi.Host) error { + if selector.Include(host) { + allSharded.Shards = append(allSharded.Shards, remoteServersShard{ + Index: index, + InternalReplication: "false", + ReplicasXML: []string{c.renderReplicaXML(host)}, + }) + index++ + } + return nil + }) + if len(allSharded.Shards) > 0 { + res = append(res, allSharded) + } + + allClusters := remoteServersTopology{ClusterName: AllClustersClusterName} + index = 0 + c.cr.WalkClusters(func(cluster chi.ICluster) error { + cluster.WalkShards(func(_ int, shard chi.IShard) error { + if c.shardHostsNum(shard, selector) < 1 { + return nil + } + r := make([]string, 0) + shard.WalkHosts(func(host *chi.Host) error { + if selector.Include(host) { + r = append(r, c.renderReplicaXML(host)) + } + return nil + }) + if len(r) == 0 { + return nil + } + allClusters.Shards = append(allClusters.Shards, remoteServersShard{ + Index: index, + InternalReplication: shard.GetInternalReplication().String(), + ReplicasXML: r, + }) + index++ + return nil + }) + return nil + }) + if len(allClusters.Shards) > 0 { + res = append(res, allClusters) + } + + return res +} + +func (c *Generator) renderClusterSecret(secret *chi.ClusterSecret) string { + if secret == nil { + return "" + } + switch secret.Source() { + case chi.ClusterSecretSourcePlaintext: + return fmt.Sprintf(" %s\n", secret.Value) + case chi.ClusterSecretSourceSecretRef, chi.ClusterSecretSourceAuto: + return fmt.Sprintf(" \n", InternodeClusterSecretEnvName) + default: + return "" + } +} + +func (c *Generator) renderReplicaXML(host *chi.Host) string { + b := &bytes.Buffer{} + c.getRemoteServersReplica(host, b) + return b.String() +} + +func (c *Generator) renderShardXML(shard remoteServersShard) string { + b := &bytes.Buffer{} + b.WriteString(" \n") + b.WriteString(fmt.Sprintf(" %s\n", shard.InternalReplication)) + if shard.Weight != nil { + b.WriteString(fmt.Sprintf(" %d\n", *shard.Weight)) + } + for _, replicaXML := range shard.ReplicasXML { + b.WriteString(replicaXML) + } + b.WriteString(" \n") + return b.String() +} + +func (c *Generator) streamRemoteServersFragments(topology remoteServersTopology, thresholdBytes, maxFragments int) ([]RemoteServersFragment, error) { + if len(topology.Shards) == 0 { + return nil, nil + } + + header := fmt.Sprintf("<%s>\n \n <%s>\n", xmlTagYandex, topology.ClusterName) + if topology.SecretXML != "" { + header += topology.SecretXML + } + footer := fmt.Sprintf(" \n \n\n", topology.ClusterName, xmlTagYandex) + // Overhead includes static XML wrappers around the fragment payload. + overhead := len(header) + len(footer) + + fragments := make([]RemoteServersFragment, 0) + for i := 0; i < len(topology.Shards); { + payload := &strings.Builder{} + // payloadBytes counts dynamic shard/replica XML only (without wrappers). + payloadBytes := 0 + start := i + + for ; i < len(topology.Shards); i++ { + shardXML := c.renderShardXML(topology.Shards[i]) + candidatePayload := payloadBytes + len(shardXML) + candidateTotal := candidatePayload + overhead + if candidateTotal > thresholdBytes && payloadBytes > 0 { + break + } + if candidateTotal > thresholdBytes { + return nil, fmt.Errorf("remote_servers shard is too large: cluster=%s shard=%d size=%d threshold=%d", topology.ClusterName, topology.Shards[i].Index, candidateTotal, thresholdBytes) + } + payload.WriteString(shardXML) + payloadBytes = candidatePayload + } + + if payloadBytes == 0 { + return nil, fmt.Errorf("unable to build remote_servers fragment for cluster=%s", topology.ClusterName) + } + + end := i - 1 + xml := header + payload.String() + footer + fragments = append(fragments, RemoteServersFragment{ + Cluster: topology.ClusterName, + ShardStart: topology.Shards[start].Index, + ShardEnd: topology.Shards[end].Index, + Index: topology.Shards[start].Index, + XML: xml, + PayloadBytes: payloadBytes, + // TotalBytes includes payload + static wrappers/metadata. + TotalBytes: len(xml), + }) + + if len(fragments) > maxFragments { + return nil, fmt.Errorf("remote_servers fragments limit exceeded: cluster=%s max=%d", topology.ClusterName, maxFragments) + } + } + + return fragments, nil +} + +func sortRemoteServersFragments(fragments []RemoteServersFragment) { + sort.Slice(fragments, func(i, j int) bool { + if fragments[i].Cluster != fragments[j].Cluster { + return fragments[i].Cluster < fragments[j].Cluster + } + return fragments[i].ShardStart < fragments[j].ShardStart + }) +} diff --git a/pkg/model/chi/config/remote_servers_fragment_test.go b/pkg/model/chi/config/remote_servers_fragment_test.go new file mode 100644 index 000000000..f24dbe783 --- /dev/null +++ b/pkg/model/chi/config/remote_servers_fragment_test.go @@ -0,0 +1,78 @@ +package config + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" +) + +func remoteServersWrapperBytes(clusterName, secretXML string) int { + header := fmt.Sprintf("<%s>\n \n <%s>\n", xmlTagYandex, clusterName) + if secretXML != "" { + header += secretXML + } + footer := fmt.Sprintf(" \n \n\n", clusterName, xmlTagYandex) + return len(header) + len(footer) +} + +func TestStreamRemoteServersFragments_PackingAndAccounting(t *testing.T) { + g := &Generator{} + topology := remoteServersTopology{ + ClusterName: "cluster-a", + SecretXML: " abc\n", + Shards: []remoteServersShard{ + { + Index: 0, + InternalReplication: "false", + ReplicasXML: []string{ + " h09000\n", + }, + }, + { + Index: 1, + InternalReplication: "false", + ReplicasXML: []string{ + " h19000\n", + }, + }, + { + Index: 2, + InternalReplication: "false", + ReplicasXML: []string{ + " h29000\n", + }, + }, + }, + } + + shard0Bytes := len(g.renderShardXML(topology.Shards[0])) + shard1Bytes := len(g.renderShardXML(topology.Shards[1])) + shard2Bytes := len(g.renderShardXML(topology.Shards[2])) + overhead := remoteServersWrapperBytes(topology.ClusterName, topology.SecretXML) + + threshold := overhead + shard0Bytes + shard1Bytes + fragments, err := g.streamRemoteServersFragments(topology, threshold, 10) + require.NoError(t, err) + require.Len(t, fragments, 2) + + first := fragments[0] + require.Equal(t, "cluster-a", first.Cluster) + require.Equal(t, 0, first.ShardStart) + require.Equal(t, 1, first.ShardEnd) + expectedPayloadFirst := shard0Bytes + shard1Bytes + require.Equal(t, expectedPayloadFirst, first.PayloadBytes) + require.Equal(t, expectedPayloadFirst+overhead, first.TotalBytes) + require.Equal(t, len(first.XML), first.TotalBytes) + require.LessOrEqual(t, first.TotalBytes, threshold) + + second := fragments[1] + require.Equal(t, "cluster-a", second.Cluster) + require.Equal(t, 2, second.ShardStart) + require.Equal(t, 2, second.ShardEnd) + expectedPayloadSecond := shard2Bytes + require.Equal(t, expectedPayloadSecond, second.PayloadBytes) + require.Equal(t, expectedPayloadSecond+overhead, second.TotalBytes) + require.Equal(t, len(second.XML), second.TotalBytes) + require.LessOrEqual(t, second.TotalBytes, threshold) +} diff --git a/pkg/model/chi/config/remote_servers_types.go b/pkg/model/chi/config/remote_servers_types.go new file mode 100644 index 000000000..e5d27e246 --- /dev/null +++ b/pkg/model/chi/config/remote_servers_types.go @@ -0,0 +1,20 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import "github.com/altinity/clickhouse-operator/pkg/interfaces" + +// RemoteServersFragment is an alias used by CHI configuration code. +type RemoteServersFragment = interfaces.RemoteServersFragment diff --git a/pkg/model/chi/creator/config_map.go b/pkg/model/chi/creator/config_map.go index cf634b640..25aa1630f 100644 --- a/pkg/model/chi/creator/config_map.go +++ b/pkg/model/chi/creator/config_map.go @@ -15,15 +15,20 @@ package creator import ( + "fmt" + core "k8s.io/api/core/v1" meta "k8s.io/apimachinery/pkg/apis/meta/v1" + clickhouseAltinityCom "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com" api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" "github.com/altinity/clickhouse-operator/pkg/interfaces" "github.com/altinity/clickhouse-operator/pkg/model/chi/config" "github.com/altinity/clickhouse-operator/pkg/model/chi/macro" "github.com/altinity/clickhouse-operator/pkg/model/chi/namer" "github.com/altinity/clickhouse-operator/pkg/model/chi/tags/labeler" + commonLabeler "github.com/altinity/clickhouse-operator/pkg/model/common/tags/labeler" + "github.com/altinity/clickhouse-operator/pkg/util" ) type ConfigMapManager struct { @@ -63,6 +68,11 @@ func (m *ConfigMapManager) CreateConfigMap(what interfaces.ConfigMapType, params options = config.NewFilesGeneratorOptions().SetHost(host) return m.createConfigMapHost(host, options) } + case interfaces.ConfigMapRemoteServers: + if len(params) > 0 { + fragment := params[0].(interfaces.RemoteServersFragment) + return m.createConfigMapRemoteServers(fragment) + } } panic("unknown config map type") } @@ -142,3 +152,44 @@ func (m *ConfigMapManager) createConfigMapHost(host *api.Host, options *config.F m.labeler.MakeObjectVersion(cm.GetObjectMeta(), cm) return cm } + +func remoteServersFilename(cluster string, shardStart int) string { + return fmt.Sprintf("chop-generated-remote_servers-part-%05d-%s.xml", shardStart, cluster) +} + +func FragmentFilenameByClusterAndShardStart(cluster string, shardStart int) string { + return remoteServersFilename(cluster, shardStart) +} + +func (m *ConfigMapManager) createConfigMapRemoteServers(fragment interfaces.RemoteServersFragment) *core.ConfigMap { + cmName := m.namer.Name(interfaces.NameConfigMapRemoteServers, m.cr, fragment.Cluster, fragment.ShardStart) + filename := remoteServersFilename(fragment.Cluster, fragment.ShardStart) + + labels := util.MergeStringMapsOverwrite( + m.macro.Scope(m.cr).Map(m.tagger.Label(interfaces.LabelConfigMapCommon)), + map[string]string{ + m.labeler.Get(commonLabeler.LabelClusterName): fragment.Cluster, + clickhouseAltinityCom.APIGroupName + "/remote-servers-shard": fmt.Sprintf("%05d", fragment.ShardStart), + }, + ) + + cm := &core.ConfigMap{ + TypeMeta: meta.TypeMeta{ + Kind: "ConfigMap", + APIVersion: "v1", + }, + ObjectMeta: meta.ObjectMeta{ + Name: cmName, + Namespace: m.cr.GetNamespace(), + Labels: labels, + Annotations: m.macro.Scope(m.cr).Map(m.tagger.Annotate(interfaces.AnnotateConfigMapCommon)), + OwnerReferences: m.or.CreateOwnerReferences(m.cr), + }, + Data: map[string]string{ + filename: fragment.XML, + }, + } + + m.labeler.MakeObjectVersion(cm.GetObjectMeta(), cm) + return cm +} diff --git a/pkg/model/chi/namer/const.go b/pkg/model/chi/namer/const.go index e2f30da7f..cfa32a8ee 100644 --- a/pkg/model/chi/namer/const.go +++ b/pkg/model/chi/namer/const.go @@ -24,6 +24,9 @@ const ( // patternConfigMapHostName is a template of macros ConfigMap. "chi-{chi}-deploy-confd-{cluster}-{shard}-{host}" patternConfigMapHostName = "chi- + macro.List.Get(macroCommon.MacrosCRName) + -deploy-confd- + macro.List.Get(macroCommon.MacrosClusterName) + - + macro.List.Get(macroCommon.MacrosHostName)" + // patternConfigMapRemoteServersName is a template of per-fragment remote_servers ConfigMap. + patternConfigMapRemoteServersName = "chi- + macro.List.Get(macroCommon.MacrosCRName) + - + macro.List.Get(macroCommon.MacrosClusterName) + -remote-servers-shard-%s" + // patternCRServiceName is a template of Custom Resource Service name. "clickhouse-{chi}" patternCRServiceName = "clickhouse- + macro.MacrosCRName" diff --git a/pkg/model/chi/namer/name.go b/pkg/model/chi/namer/name.go index fef8d1a06..7b092e774 100644 --- a/pkg/model/chi/namer/name.go +++ b/pkg/model/chi/namer/name.go @@ -39,6 +39,11 @@ func (n *Namer) createConfigMapNameHost(host *api.Host) string { return n.macro.Scope(host).Line(patterns.Get(patternConfigMapHostName)) } +// createConfigMapNameRemoteServers returns a name for a remote_servers fragment ConfigMap. +func (n *Namer) createConfigMapNameRemoteServers(cr api.ICustomResource, clusterName string, shardStart int) string { + return fmt.Sprintf("chi-%s-%s-remote-servers-shard-%05d", cr.GetName(), clusterName, shardStart) +} + // createCRServiceName creates a name of a root ClickHouseInstallation Service resource func (n *Namer) createCRServiceName(cr api.ICustomResource, templates ...*api.ServiceTemplate) string { // Name can be generated either from default name pattern, diff --git a/pkg/model/chi/namer/namer.go b/pkg/model/chi/namer/namer.go index 4b9cda877..3ccf30bad 100644 --- a/pkg/model/chi/namer/namer.go +++ b/pkg/model/chi/namer/namer.go @@ -47,6 +47,11 @@ func (n *Namer) Name(what interfaces.NameType, params ...any) string { case interfaces.NameConfigMapCommonUsers: cr := params[0].(api.ICustomResource) return n.createConfigMapNameCommonUsers(cr) + case interfaces.NameConfigMapRemoteServers: + cr := params[0].(api.ICustomResource) + clusterName := params[1].(string) + shardStart := params[2].(int) + return n.createConfigMapNameRemoteServers(cr, clusterName, shardStart) case interfaces.NameCRService: if len(params) > 1 { diff --git a/pkg/model/chi/namer/patterns.go b/pkg/model/chi/namer/patterns.go index c9a74d5a3..48fe0de2b 100644 --- a/pkg/model/chi/namer/patterns.go +++ b/pkg/model/chi/namer/patterns.go @@ -30,6 +30,9 @@ var patterns = types.List{ // patternConfigMapHostName is a template of macros ConfigMap. "chi-{chi}-deploy-confd-{cluster}-{shard}-{host}" patternConfigMapHostName: "chi-" + macrosList.Get().Get(macro.MacrosCRName) + "-deploy-confd-" + macrosList.Get().Get(macro.MacrosClusterName) + "-" + macrosList.Get().Get(macro.MacrosHostName), + // patternConfigMapRemoteServersName is a template of per-fragment remote_servers ConfigMap. + patternConfigMapRemoteServersName: "chi-" + macrosList.Get().Get(macro.MacrosCRName) + "-" + macrosList.Get().Get(macro.MacrosClusterName) + "-remote-servers-shard-%s", + // patternCRServiceName is a template of Custom Resource Service name. "clickhouse-{chi}" patternCRServiceName: "clickhouse-" + macrosList.Get().Get(macro.MacrosCRName), diff --git a/pkg/model/chi/volume/volume.go b/pkg/model/chi/volume/volume.go index 832372e27..45cffb2c8 100644 --- a/pkg/model/chi/volume/volume.go +++ b/pkg/model/chi/volume/volume.go @@ -16,6 +16,7 @@ package volume import ( apps "k8s.io/api/apps/v1" + core "k8s.io/api/core/v1" api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" "github.com/altinity/clickhouse-operator/pkg/interfaces" @@ -56,11 +57,19 @@ func (m *Manager) stsSetupVolumesForConfigMaps(statefulSet *apps.StatefulSet, ho configMapCommonName := m.namer.Name(interfaces.NameConfigMapCommon, m.cr) configMapCommonUsersName := m.namer.Name(interfaces.NameConfigMapCommonUsers, m.cr) configMapHostName := m.namer.Name(interfaces.NameConfigMapHost, host) + remoteServersMounts := host.GetCR().GetRuntime().GetAttributes().GetRemoteServersMounts() + + var configCommonVolume core.Volume + if len(remoteServersMounts) > 0 { + configCommonVolume = createProjectedConfigCommonVolume(configMapCommonName, remoteServersMounts) + } else { + configCommonVolume = k8s.CreateVolumeForConfigMap(configMapCommonName) + } // Add all ConfigMap objects as Volume objects of type ConfigMap k8s.StatefulSetAppendVolumes( statefulSet, - k8s.CreateVolumeForConfigMap(configMapCommonName), + configCommonVolume, k8s.CreateVolumeForConfigMap(configMapCommonUsersName), k8s.CreateVolumeForConfigMap(configMapHostName), ) @@ -75,6 +84,37 @@ func (m *Manager) stsSetupVolumesForConfigMaps(statefulSet *apps.StatefulSet, ho ) } +func createProjectedConfigCommonVolume(configMapCommonName string, remoteServersMounts []api.RemoteServersMount) core.Volume { + var defaultMode int32 = 0644 + + sources := []core.VolumeProjection{ + { + ConfigMap: &core.ConfigMapProjection{ + LocalObjectReference: core.LocalObjectReference{Name: configMapCommonName}, + }, + }, + } + + for _, mount := range remoteServersMounts { + sources = append(sources, core.VolumeProjection{ + ConfigMap: &core.ConfigMapProjection{ + LocalObjectReference: core.LocalObjectReference{Name: mount.ConfigMapName}, + Items: []core.KeyToPath{{Key: mount.FileName, Path: mount.FileName}}, + }, + }) + } + + return core.Volume{ + Name: configMapCommonName, + VolumeSource: core.VolumeSource{ + Projected: &core.ProjectedVolumeSource{ + Sources: sources, + DefaultMode: &defaultMode, + }, + }, + } +} + // stsSetupVolumesUserDataWithFixedPaths // appends VolumeMounts for Data and Log VolumeClaimTemplates on all containers. // Creates VolumeMounts for Data and Log volumes in case these volume templates are specified in `templates`. diff --git a/pkg/model/chk/config/files_generator.go b/pkg/model/chk/config/files_generator.go index bc108a70a..bcc9d0142 100644 --- a/pkg/model/chk/config/files_generator.go +++ b/pkg/model/chk/config/files_generator.go @@ -74,6 +74,10 @@ func (c *FilesGenerator) CreateConfigFiles(what interfaces.FilesGroupType, param return nil } +func (c *FilesGenerator) CreateRemoteServersFragments(options any) ([]interfaces.RemoteServersFragment, error) { + return []interfaces.RemoteServersFragment{}, nil +} + // createConfigFilesGroupCommon creates common config files func (c *FilesGenerator) createConfigFilesGroupCommon(options *FilesGeneratorOptions) map[string]string { if options == nil { @@ -93,6 +97,7 @@ func (c *FilesGenerator) createConfigFilesGroupCommonDomain(configSections map[s } func (c *FilesGenerator) createConfigFilesGroupCommonGeneric(configSections map[string]string, options *FilesGeneratorOptions) { + _ = options // common settings util.IncludeNonEmpty(configSections, createConfigSectionFilename(configSettings), c.configGeneratorGeneric.GetGlobalSettings()) // common files diff --git a/pkg/model/common/creator/stateful-set.go b/pkg/model/common/creator/stateful-set.go index b929c2a48..474b9b96b 100644 --- a/pkg/model/common/creator/stateful-set.go +++ b/pkg/model/common/creator/stateful-set.go @@ -99,6 +99,7 @@ func (c *Creator) createPodTemplateSpec(template *api.PodTemplate, host *api.Hos c.tagger.Annotate(interfaces.AnnotatePodTemplate, host), template.ObjectMeta.GetAnnotations(), )) + annotations = util.MergeStringMapsOverwrite(annotations, host.GetCR().GetRuntime().GetAttributes().GetAdditionalPodTemplateAnnotations()) return core.PodTemplateSpec{ ObjectMeta: meta.ObjectMeta{ diff --git a/tests/e2e/manifests/chi/test-062-remote-servers-fragmentation-1.yaml b/tests/e2e/manifests/chi/test-062-remote-servers-fragmentation-1.yaml new file mode 100644 index 000000000..c5c82328c --- /dev/null +++ b/tests/e2e/manifests/chi/test-062-remote-servers-fragmentation-1.yaml @@ -0,0 +1,13 @@ +apiVersion: "clickhouse.altinity.com/v1" +kind: "ClickHouseInstallation" +metadata: + name: t062-rs-frag +spec: + useTemplates: + - name: clickhouse-version + configuration: + clusters: + - name: default + layout: + shardsCount: 4 + replicasCount: 1 diff --git a/tests/e2e/manifests/chi/test-062-remote-servers-fragmentation-2.yaml b/tests/e2e/manifests/chi/test-062-remote-servers-fragmentation-2.yaml new file mode 100644 index 000000000..da8c447fd --- /dev/null +++ b/tests/e2e/manifests/chi/test-062-remote-servers-fragmentation-2.yaml @@ -0,0 +1,13 @@ +apiVersion: "clickhouse.altinity.com/v1" +kind: "ClickHouseInstallation" +metadata: + name: t062-rs-frag +spec: + useTemplates: + - name: clickhouse-version + configuration: + clusters: + - name: default + layout: + shardsCount: 1 + replicasCount: 1 diff --git a/tests/e2e/manifests/chi/test-063-remote-servers-max-fragments.yaml b/tests/e2e/manifests/chi/test-063-remote-servers-max-fragments.yaml new file mode 100644 index 000000000..1b2a35ea0 --- /dev/null +++ b/tests/e2e/manifests/chi/test-063-remote-servers-max-fragments.yaml @@ -0,0 +1,13 @@ +apiVersion: "clickhouse.altinity.com/v1" +kind: "ClickHouseInstallation" +metadata: + name: t063-rs-maxfrag +spec: + useTemplates: + - name: clickhouse-version + configuration: + clusters: + - name: default + layout: + shardsCount: 4 + replicasCount: 1 diff --git a/tests/e2e/manifests/chopconf/test-062-remote-servers-fragmentation.yaml b/tests/e2e/manifests/chopconf/test-062-remote-servers-fragmentation.yaml new file mode 100644 index 000000000..2ccac9a01 --- /dev/null +++ b/tests/e2e/manifests/chopconf/test-062-remote-servers-fragmentation.yaml @@ -0,0 +1,10 @@ +apiVersion: "clickhouse.altinity.com/v1" +kind: "ClickHouseOperatorConfiguration" +metadata: + name: "test-062-remote-servers-fragmentation" +spec: + clickhouse: + configuration: + remoteServers: + remoteServersSplitThresholdBytes: 500 + maxRemoteServersFragments: 32 diff --git a/tests/e2e/manifests/chopconf/test-063-remote-servers-max-fragments.yaml b/tests/e2e/manifests/chopconf/test-063-remote-servers-max-fragments.yaml new file mode 100644 index 000000000..ae7e3c016 --- /dev/null +++ b/tests/e2e/manifests/chopconf/test-063-remote-servers-max-fragments.yaml @@ -0,0 +1,10 @@ +apiVersion: "clickhouse.altinity.com/v1" +kind: "ClickHouseOperatorConfiguration" +metadata: + name: "test-063-remote-servers-max-fragments" +spec: + clickhouse: + configuration: + remoteServers: + remoteServersSplitThresholdBytes: 500 + maxRemoteServersFragments: 1 diff --git a/tests/e2e/test_operator.py b/tests/e2e/test_operator.py index 5eb3e2d14..f3e0857d7 100644 --- a/tests/e2e/test_operator.py +++ b/tests/e2e/test_operator.py @@ -404,7 +404,7 @@ def get_replicas_from_remote_servers(chi, cluster, shell=None): if cluster == "": cluster = chi - remote_servers = kubectl.get("configmap", f"chi-{chi}-common-configd", shell=shell)["data"]["chop-generated-remote_servers.xml"] + remote_servers = get_remote_servers_xml(chi, shell=shell) chi_start = remote_servers.find(f"<{cluster}>") chi_end = remote_servers.find(f"") @@ -422,6 +422,218 @@ def get_replicas_from_remote_servers(chi, cluster, shell=None): return chi_replicas // chi_shards +def get_remote_servers_xml(chi, shell=None): + common_config = kubectl.get("configmap", f"chi-{chi}-common-configd", shell=shell) + data = common_config.get("data", {}) + + legacy = data.get("chop-generated-remote_servers.xml") + if legacy is not None: + return legacy + + fragments = get_remote_servers_fragment_configmaps(chi, shell=shell) + xml = "" + for cm in fragments: + cm_data = cm.get("data", {}) + for key in sorted(cm_data.keys()): + if key.startswith("chop-generated-remote_servers-part-") and key.endswith(".xml"): + xml += cm_data[key] + + return xml + + +def get_remote_servers_fragment_configmaps(chi, shell=None): + fragments_list = kubectl.get( + "configmap", + "", + label=f"-l clickhouse.altinity.com/chi={chi},clickhouse.altinity.com/remote-servers-shard", + shell=shell, + ok_to_fail=True, + ) + + items = fragments_list.get("items", []) if fragments_list else [] + return sorted(items, key=lambda x: x["metadata"]["name"]) + + +def get_remote_servers_fragment_names(chi, shell=None): + return [item["metadata"]["name"] for item in get_remote_servers_fragment_configmaps(chi, shell=shell)] + + +def get_remote_servers_volume_configmaps(chi, pod_name="", shell=None): + pod_spec = kubectl.get_pod_spec(chi, pod_name=pod_name, shell=shell) + names = [] + for volume in pod_spec.get("volumes", []): + config_map = volume.get("configMap") + if config_map is None: + projected = volume.get("projected") + if projected is None: + continue + for source in projected.get("sources", []): + projected_config_map = source.get("configMap") + if projected_config_map is None: + continue + projected_name = projected_config_map.get("name", "") + if "-remote-servers-shard-" in projected_name: + names.append(projected_name) + continue + + name = config_map.get("name", "") + if "-remote-servers-shard-" in name: + names.append(name) + return sorted(set(names)) + + +def get_chi_pod_names(chi, shell=None): + pods = kubectl.get( + "pod", + "", + label=f"-l clickhouse.altinity.com/chi={chi}", + ok_to_fail=True, + shell=shell, + ) + items = pods.get("items", []) if pods else [] + return sorted(item["metadata"]["name"] for item in items) + + +def get_chi_status_pod_names(chi, shell=None): + obj = kubectl.get("chi", chi, ok_to_fail=True, shell=shell) or {} + status = obj.get("status", {}) + pods = status.get("pods", []) + return sorted(pods) if isinstance(pods, list) else [] + + +def get_existing_pod_names_from_status(chi, shell=None): + existing = [] + for pod_name in get_chi_status_pod_names(chi, shell=shell): + pod = kubectl.get("pod", pod_name, ok_to_fail=True, shell=shell) + if pod: + existing.append(pod_name) + return sorted(existing) + + +def get_chi_statefulset_names(chi, shell=None): + sts = kubectl.get( + "statefulset", + "", + label=f"-l clickhouse.altinity.com/chi={chi}", + ok_to_fail=True, + shell=shell, + ) + items = sts.get("items", []) if sts else [] + return sorted(item["metadata"]["name"] for item in items) + + +def format_chi_reconcile_diagnostics(chi, shell=None): + status = kubectl.get("chi", chi, ok_to_fail=True, shell=shell) or {} + status_data = status.get("status", {}) + status_text = status_data.get("status", "") + errors = status_data.get("errors", []) + recent_errors = "\n".join(errors[-5:]) if errors else "" + return ( + f"CHI={chi} status={status_text}, " + f"hosts={status_data.get('hosts', '')}, " + f"hostsCompleted={status_data.get('hostsCompleted', '')}, " + f"pods={status_data.get('pods', [])}, " + f"existingPodsByLabel={get_chi_pod_names(chi, shell=shell)}, " + f"existingPodsByStatusName={get_existing_pod_names_from_status(chi, shell=shell)}, " + f"statefulsets={get_chi_statefulset_names(chi, shell=shell)}. " + f"Recent CHI errors:\n{recent_errors}" + ) + + +def wait_for_chi_pods(chi, expected_count, timeout=900, delay=5, shell=None): + deadline = time.time() + timeout + completed_without_pods_since = None + completed_without_pods_grace = 120 + + while time.time() < deadline: + pod_names = sorted( + set(get_chi_pod_names(chi, shell=shell)) + | set(get_existing_pod_names_from_status(chi, shell=shell)) + ) + if len(pod_names) >= expected_count: + return pod_names + + chi_status = kubectl.get_field("chi", chi, ".status.status", shell=shell) + # Allow short stabilization after Completed, because label/index propagation may lag. + if chi_status == "Completed" and len(pod_names) == 0: + now = time.time() + if completed_without_pods_since is None: + completed_without_pods_since = now + elif now - completed_without_pods_since >= completed_without_pods_grace: + assert False, error( + "CHI is Completed but no ClickHouse pods were created. " + + format_chi_reconcile_diagnostics(chi, shell=shell) + ) + else: + completed_without_pods_since = None + + time.sleep(delay) + + assert False, error( + f"Timed out waiting for {expected_count} ClickHouse pods. " + + format_chi_reconcile_diagnostics(chi, shell=shell) + ) + + +def wait_for_clickhouse_pod_ready(pod_name, timeout=300, delay=5, shell=None): + deadline = time.time() + timeout + last_state = "pod not found" + + while time.time() < deadline: + pod = kubectl.get("pod", pod_name, ok_to_fail=True, shell=shell) + if pod: + status = pod.get("status", {}) + phase = status.get("phase", "") + container_statuses = status.get("containerStatuses", []) + + # Wait for pod phase to be Running, but do not block on container readiness. + # File read verification below already retries transient exec failures. + if phase == "Running": + return + + last_state = f"phase={phase}, containerStatuses={container_statuses}" + + time.sleep(delay) + + assert False, error( + f"Timed out waiting for pod {pod_name} to be Running. " + f"Last observed state: {last_state}" + ) + + +def read_pod_file_with_retry(pod_name, file_path, timeout=120, delay=3, shell=None): + deadline = time.time() + timeout + missing_marker = "__MISSING_FILE__" + transient_errors = ( + "unable to upgrade connection", + "container not found", + "error: internal error occurred", + "error from server", + ) + + while time.time() < deadline: + output = kubectl.launch( + f'exec {pod_name} -- bash -c "cat \"{file_path}\" 2>/dev/null || echo {missing_marker}"', + ok_to_fail=True, + shell=shell, + ) + + output_lower = output.lower() + if any(text in output_lower for text in transient_errors): + time.sleep(delay) + continue + + if missing_marker in output: + time.sleep(delay) + continue + + return output + + assert False, error( + f"Timed out waiting for file {file_path} to become available in pod {pod_name}" + ) + + @TestCheck def check_remote_servers(self, chi, check_shards, check_replicas, trigger_event, shell=None, cluster=""): """Check cluster definition in configmap until signal is received""" @@ -1361,7 +1573,7 @@ def test_010013_1(self): def get_shards_from_remote_servers(chi, cluster, shell=None): if cluster == "": cluster = chi - remote_servers = kubectl.get("configmap", f"chi-{chi}-common-configd", shell=shell)["data"]["chop-generated-remote_servers.xml"] + remote_servers = get_remote_servers_xml(chi, shell=shell) chi_start = remote_servers.find(f"<{cluster}>") chi_end = remote_servers.find(f"") @@ -5322,7 +5534,7 @@ def test_010056(self): assert out != "0" with And("Replica still should be unready after reconcile timeout"): - ready = kubectl.get_field("pod", f"chi-{chi}-{cluster}-0-1-0", ".metadata.labels.clickhouse\.altinity\.com\/ready") + ready = kubectl.get_field("pod", f"chi-{chi}-{cluster}-0-1-0", r".metadata.labels.clickhouse\.altinity\.com\/ready") print(f"ready label={ready}") assert ready != "yes", error("Replica should be unready") @@ -5348,7 +5560,7 @@ def test_010056(self): with Then("Replica should become ready"): kubectl.wait_field("pod", f"chi-{chi}-{cluster}-0-1-0", - ".metadata.labels.clickhouse\.altinity\.com\/ready", value="yes") + r".metadata.labels.clickhouse\.altinity\.com\/ready", value="yes") with And("Replication delay should be zero"): out = clickhouse.query(chi, "select max(absolute_delay) from system.replicas", host=f"chi-{chi}-{cluster}-0-1-0") @@ -5634,6 +5846,134 @@ def test_010061(self): with Finally("I clean up"): delete_test_namespace() + +@TestScenario +@Name("test_010062. Test remote_servers fragment mounts and GC") +def test_010062(self): + create_shell_namespace_clickhouse_template() + chopconf_manifest = "manifests/chopconf/test-062-remote-servers-fragmentation.yaml" + + with Given("Operator configuration enables remote_servers fragmentation"): + kubectl.apply( + util.get_full_path(chopconf_manifest, lookup_in_host=False), + current().context.operator_namespace, + ) + util.restart_operator(ns=current().context.operator_namespace) + + manifest = "manifests/chi/test-062-remote-servers-fragmentation-1.yaml" + chi = yaml_manifest.get_name(util.get_full_path(manifest)) + pod_name = "" + + with Given("CHI with multiple shards is installed"): + kubectl.apply_chi(util.get_full_path(manifest, False)) + kubectl.wait_chi_status(chi, "InProgress", retries=4, throw_error=False) + pod_names = wait_for_chi_pods(chi, expected_count=4) + kubectl.wait_chi_status(chi, "Completed") + pod_name = pod_names[0] + wait_for_clickhouse_pod_ready(pod_name) + + with Then("remote_servers are fragmented into multiple ConfigMaps"): + initial_fragments = [] + for attempt in retries(timeout=120, delay=5): + with attempt: + initial_fragments = get_remote_servers_fragment_configmaps(chi) + assert len(initial_fragments) > 1, error( + "expected multiple remote_servers fragments, " + f"got {len(initial_fragments)}" + ) + + initial_fragment_names = {item["metadata"]["name"] for item in initial_fragments} + + with Then("Fragment files are mounted into pod and match ConfigMap XML"): + mounted_configmaps = set(get_remote_servers_volume_configmaps(chi, pod_name=pod_name)) + assert mounted_configmaps == initial_fragment_names + + for fragment in initial_fragments: + cm_name = fragment["metadata"]["name"] + cm_data = fragment.get("data", {}) + assert cm_name in mounted_configmaps + for file_name, expected_xml in cm_data.items(): + cat_path = f"/etc/clickhouse-server/config.d/{file_name}" + actual_xml = read_pod_file_with_retry(pod_name, cat_path) + assert actual_xml.strip() == expected_xml.strip(), error(f"mismatch for {cm_name}/{file_name}") + + with When("CHI is scaled down and rollout completes"): + kubectl.apply_chi(util.get_full_path("manifests/chi/test-062-remote-servers-fragmentation-2.yaml", False)) + kubectl.wait_chi_status(chi, "InProgress", retries=4, throw_error=False) + wait_for_chi_pods(chi, expected_count=1) + kubectl.wait_chi_status(chi, "Completed") + + with Then("Stale remote_servers fragment ConfigMaps are garbage collected"): + current_fragment_names = set(get_remote_servers_fragment_names(chi)) + stale_fragment_names = initial_fragment_names - current_fragment_names + assert len(stale_fragment_names) > 0 + + for attempt in retries(timeout=180, delay=5): + with attempt: + remaining_fragment_names = set(get_remote_servers_fragment_names(chi)) + assert len(remaining_fragment_names & stale_fragment_names) == 0 + current_fragment_names = remaining_fragment_names + + with Then("Pods do not reference deleted remote_servers ConfigMaps"): + mounted_after_rollout = set(get_remote_servers_volume_configmaps(chi, pod_name=f"chi-{chi}-default-0-0-0")) + assert mounted_after_rollout == current_fragment_names + + with Then("remote_servers XML is still valid after GC"): + assert get_shards_from_remote_servers(chi, "default") == 1 + + with Finally("I clean up"): + delete_test_namespace() + + +@TestScenario +@Name("test_010063. Test maxRemoteServersFragments reconcile failure") +def test_010063(self): + create_shell_namespace_clickhouse_template() + chopconf_manifest = "manifests/chopconf/test-063-remote-servers-max-fragments.yaml" + + with Given("Operator configuration sets very low maxRemoteServersFragments"): + kubectl.apply( + util.get_full_path(chopconf_manifest, lookup_in_host=False), + current().context.operator_namespace, + ) + util.restart_operator(ns=current().context.operator_namespace) + + manifest = "manifests/chi/test-063-remote-servers-max-fragments.yaml" + chi = yaml_manifest.get_name(util.get_full_path(manifest)) + + with When("CHI is applied"): + kubectl.apply_chi(util.get_full_path(manifest, False)) + + with Then("Reconcile fails and emits ReconcileFailed event"): + expected_error_signature = "remote_servers fragments limit exceeded" + events = None + for attempt in retries(timeout=180, delay=5): + with attempt: + events = kubectl.get( + "events", + "", + label=( + "--field-selector " + f"involvedObject.kind=ClickHouseInstallation," + f"involvedObject.name={chi}," + "reason=ReconcileFailed" + ), + ok_to_fail=True, + ) + items = events.get("items", []) if events else [] + assert len(items) > 0 + messages = [item.get("message", "") for item in items] + assert any(expected_error_signature in message.lower() for message in messages), error( + "expected ReconcileFailed event with max fragments error signature. " + f"signature='{expected_error_signature}', messages={messages}" + ) + + status = kubectl.get_field("chi", chi, ".status.status") + assert status != "Completed" + + with Finally("I clean up"): + delete_test_namespace() + # # Keeper tests section # diff --git a/tests/regression.py b/tests/regression.py index 496747970..8aae42e5b 100755 --- a/tests/regression.py +++ b/tests/regression.py @@ -1,4 +1,16 @@ #!/usr/bin/env python3 + +# Python 3.13 removed some private lzma mode constants that TestFlows 2.2.8 still references. +# Provide backward-compatible fallbacks before importing testflows. +import lzma + +if not hasattr(lzma, "_MODE_CLOSED"): + lzma._MODE_CLOSED = 0 +if not hasattr(lzma, "_MODE_READ"): + lzma._MODE_READ = 1 +if not hasattr(lzma, "_MODE_WRITE"): + lzma._MODE_WRITE = 3 + from testflows.core import * from helpers.argparser import argparser