Go 微服务可观测性:日志、指标、追踪实战

背景 微服务架构下,服务间调用链路错综复杂。一旦出问题,没有可观测性支撑,排查起来就是噩梦。 可观测性三驾马车:日志(Logs)、指标(Metrics)、追踪(Traces)。 日志:结构化日志是基础 别再用 fmt.Printf 了,结构化日志才是正道: import "github.com/rs/zerolog" func main() { log := zerolog.New(os.Stdout). With(). Timestamp(). Caller(). Logger() log.Info(). Str("service", "user-service"). Int("request_id", 12345). Msg("User login successful") } 输出: {"level":"info","service":"user-service","request_id":12345,"time":"2026-04-11T10:00:00Z","caller":"main.go:25","message":"User login successful"} 指标:Prometheus + Grafana import "github.com/prometheus/client_golang/prometheus" import "github.com/prometheus/client_golang/prometheus/promhttp" var ( httpRequests = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "http_requests_total", Help: "Total HTTP requests", }, []string{"method", "path", "status"}, ) httpDuration = prometheus.NewHistogramVec( prometheus.HistogramOpts{ Name: "http_request_duration_seconds", Buckets: prometheus.DefBuckets, }, []string{"method", "path"}, ) ) func init() { prometheus.MustRegister(httpRequests, httpDuration) } // 中间件示例 func promMiddleware(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { start := time.Now() rw := &responseWriter{ResponseWriter: w, statusCode: 200} next.ServeHTTP(rw, r) duration := time.Since(start).Seconds() httpRequests.WithLabelValues(r.Method, r.URL.Path, strconv.Itoa(rw.statusCode)).Inc() httpDuration.WithLabelValues(r.Method, r.URL.Path).Observe(duration) }) } 分布式追踪:OpenTelemetry import "go.opentelemetry.io/otel" import "go.opentelemetry.io/otel/exporters/jaeger" import "go.opentelemetry.io/otel/sdk/trace" func initTracer() (func(), error) { exp, err := jaeger.New(jaeger.WithAgentEndpoint()) if err != nil { return nil, err } tp := trace.NewTracerProvider( trace.WithBatcher(exp), trace.WithSampler(trace.AlwaysSample()), ) otel.SetTracerProvider(tp) return func() { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() tp.Shutdown(ctx) }, nil } // 在 HTTP handler 中使用 func handleGetUser(w http.ResponseWriter, r *http.Request) { ctx, span := otel.Tracer("user-service").Start(r.Context(), "GetUser") defer span.End() span.SetAttributes( attribute.String("user.id", r.URL.Query().Get("id")), ) user, err := getUserFromDB(ctx, r.URL.Query().Get("id")) if err != nil { span.RecordError(err) // ... } // 传递给后续调用 go someAsyncOperation(ctx, user) } 三者结合:一个完整示例 type UserService struct { logger zerolog.Logger tracer trace.Tracer metrics *UserMetrics userRepo *UserRepository } func (s *UserService) GetUser(ctx context.Context, id string) (*User, error) { // 1. 开始追踪 ctx, span := s.tracer.Start(ctx, "UserService.GetUser") defer span.End() span.SetAttributes(attribute.String("user.id", id)) // 2. 记录指标 s.metrics.requests.Inc() timer := s.metrics.duration.NewTimer() // 3. 结构化日志 s.logger.Info(). Str("user_id", id). Str("trace_id", span.SpanContext().TraceID().String()). Msg("Fetching user") // 4. 业务逻辑 user, err := s.userRepo.FindByID(ctx, id) if err != nil { // 记录错误,包含追踪上下文 s.logger.Error(). Err(err). Str("user_id", id). Str("trace_id", span.SpanContext().TraceID().String()). Msg("Failed to fetch user") span.RecordError(err) s.metrics.errors.Inc() return nil, err } timer.ObserveDuration() return user, nil } 可视化:用 Grafana 大盘 常见 Dashboard 布局: ...

2026年4月11日 · 2 分钟 · BvBeJ

Go 并发模式:Pipeline 实战

背景 Go 的并发模型是其最强大的特性之一。goroutine + channel 的组合让我们能以极低的成本构建高性能的并发系统。 今天聊聊 Pipeline 模式——一种将数据处理流程抽象为一系列阶段的编程范式。 什么是 Pipeline 想象工厂流水线:原料从一端进入,经过多个工序处理,最终成品从另一端出来。 func main() { // 生成数据 data := generate(1, 2, 3, 4, 5) // 流水线:平方 -> 过滤偶数 -> 输出 result := pipeline(data, square, filterEven, printResult, ) <-result.done // 等待完成 } 实战:图片处理流水线 假设我们要处理一批图片:下载 → 缩放 → 添加水印 → 上传。 type Image struct { URL string Data []byte } func ProcessImages(urls []string) error { downloads := make(chan Image, 100) resized := make(chan Image, 100) watermarked := make(chan Image, 100) var wg sync.WaitGroup // 下载阶段 wg.Add(1) go func() { defer wg.Done() for _, url := range urls { img, err := download(url) if err != nil { log.Printf("下载失败: %v", err) continue } downloads <- img } close(downloads) }() // 缩放阶段 (3个worker) for i := 0; i < 3; i++ { wg.Add(1) go func() { defer wg.Done() for img := range downloads { resizedImg, _ := resize(img, 800, 600) resized <- resizedImg } }() } // 水印阶段 (2个worker) for i := 0; i < 2; i++ { wg.Add(1) go func() { defer wg.Done() for img := range resized { watermarkedImg, _ := watermark(img, "© My Blog") watermarked <- watermarkedImg } }() } // 上传阶段 wg.Add(1) go func() { defer wg.Done() for img := range watermarked { if err := upload(img); err != nil { log.Printf("上传失败: %v", err) } } }() wg.Wait() return nil } 优雅的错误处理 Pipeline 中如何处理错误?一个不错的方案是用错误 channel: ...

2026年4月10日 · 2 分钟 · BvBeJ

Kubernetes Operator 开发实战:用 Go 告别手动运维

背景 Kubernetes Operator 是 CNCF 主推的云原生扩展机制。用 Go 写 Operator 是我日常工作的重要部分。 这篇文章聊聊怎么从零开发一个生产级的 Operator。 核心概念 Operator 核心是声明式 API + reconciliation loop: 用户声明期望状态 → Controller 调和 → 实际状态趋近期望 项目结构 my-operator/ ├── main.go ├── api/ │ └── v1/ │ └── myapp_types.go # CRD 定义 ├── controllers/ │ └── myapp_controller.go # Reconciliation 逻辑 └── config/ ├── crd/ └── rbac/ 第一步:定义 CRD (Custom Resource Definition) // api/v1/myapp_types.go package v1 import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) type MyAppSpec struct { Replicas int32 `json:"replicas,omitempty"` Image string `json:"image"` Port int32 `json:"port"` EnvVars []EnvVar `json:"envVars,omitempty"` } type EnvVar struct { Name string `json:"name"` Value string `json:"value"` } type MyAppStatus struct { AvailableReplicas int32 `json:"availableReplicas,omitempty"` Conditions []metav1.Condition `json:"conditions,omitempty"` } // +kubebuilder:object:root=true // +kubebuilder:subresource:status // +kubebuilder:resource:shortName=myapp type MyApp struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec MyAppSpec `json:"spec,omitempty"` Status MyAppStatus `json:"status,omitempty"` } func (r *MyApp) Hub() {} 第二步:生成代码 # 安装 controller-gen go install sigs.k8s.io/controller-tools/cmd/controller-gen@latest # 生成 CRD + RBAC + DeepCopy controller-gen object:headerFile="hack/boilerplate.go.txt" paths="./..." # 生成 CRD YAML controller-gen crd:crdVersions=v1 paths="./..." output:crd:artifacts:config=config/crd/bases 第三步:实现 Controller // controllers/myapp_controller.go package controllers type MyAppReconciler struct { Client client.Client Scheme *runtime.Scheme Log logr.Logger } func (r *MyAppReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { log := r.Log.WithValues("myapp", req.NamespacedName) // 1. 获取资源 var myapp v1.MyApp if err := r.Get(ctx, req.NamespacedName, &myapp); err != nil { return ctrl.Result{}, client.IgnoreNotFound(err) } // 2. 构建 Deployment deploy := r.buildDeployment(&myapp) if err := ctrl.SetControllerReference(&myapp, deploy, r.Scheme); err != nil { return ctrl.Result{}, err } // 3. 创建或更新 Deployment found := &appsv1.Deployment{} err := r.Get(ctx, req.NamespacedName, found) if err != nil && errors.IsNotFound(err) { log.Info("Creating Deployment", "name", deploy.Name) err = r.Create(ctx, deploy) } else if err == nil { // 更新(需要对比 spec 差异) if !r.deploymentEqual(found, deploy) { found.Spec = deploy.Spec log.Info("Updating Deployment") err = r.Update(ctx, found) } } // 4. 更新 Status r.updateStatus(&myapp, found) return ctrl.Result{RequeueAfter: 30 * time.Second}, nil } func (r *MyAppReconciler) buildDeployment(app *v1.MyApp) *appsv1.Deployment { replicas := app.Spec.Replicas if replicas == 0 { replicas = 1 } return &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: app.Name, Namespace: app.Namespace, }, Spec: appsv1.DeploymentSpec{ Replicas: &replicas, Selector: &metav1.LabelSelector{ MatchLabels: map[string]string{"app": app.Name}, }, Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{"app": app.Name}, }, Spec: corev1.PodSpec{ Containers: []corev1.Container{{ Name: "myapp", Image: app.Spec.Image, Ports: []corev1.ContainerPort{{ ContainerPort: app.Spec.Port, }}, Env: r.buildEnvVars(app.Spec.EnvVars), }}, }, }, }, } } 第四步:启动 Controller // main.go func main() { ctrl.SetLogger(zap.New(zap.UseDevMode(true))) mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ Scheme: scheme, }) if err != nil { setupLog.Error(err, "unable to start manager") os.Exit(1) } if err = (&controllers.MyAppReconciler{ Client: mgr.GetClient(), Scheme: mgr.GetScheme(), }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller") os.Exit(1) } if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { setupLog.Error(err, "problem running manager") os.Exit(1) } } 高级特性 1. Webhook 验证 // webhooks/myapp_webhook.go func (r *MyApp) ValidateCreate() error { if r.Spec.Replicas < 0 { return field.Invalid( field.NewPath("spec").Child("replicas"), r.Spec.Replicas, "replicas must be non-negative", ) } return nil } 2. Finalizer(防止误删) func (r *MyAppReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { myapp := &v1.MyApp{} r.Get(ctx, req.NamespacedName, myapp) // 删除标记? if myapp.DeletionTimestamp.IsZero() { // 添加 finalizer if !containsString(myapp.GetFinalizers(), "myapp.finalizer") { myapp.Finalizers = append(myapp.GetFinalizers(), "myapp.finalizer") r.Update(ctx, myapp) } } else { // 执行清理逻辑 r.cleanup(myapp) // 移除 finalizer myapp.Finalizers = removeString(myapp.GetFinalizers(), "myapp.finalizer") r.Update(ctx, myapp) } } 测试 import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) var _ = Describe("MyApp controller", func() { Context("with basic spec", func() { It("should create a Deployment", func() { myapp := &v1.MyApp{ ObjectMeta: metav1.ObjectMeta{ Name: "test", Namespace: "default", }, Spec: v1.MyAppSpec{ Replicas: 2, Image: "nginx:latest", Port: 80, }, } Expect(k8sClient.Create(ctx, myapp)).Should(Succeed()) }) }) }) 部署 Operator # config/manager/manager.yaml apiVersion: apps/v1 kind: Deployment metadata: name: my-operator spec: replicas: 1 template: spec: containers: - name: operator image: myorg/my-operator:v1.0.0 env: - name: WATCH_NAMESPACE value: "" # OLM (Operator Lifecycle Manager) 安装 operator-sdk olm install operator-sdk run bundle myorg/my-operator-bundle:v1.0.0 总结 Operator 开发的核心: ...

2026年4月4日 · 3 分钟 · BvBeJ