Add OpenTelemetry observability to Arrtrix
- Add OTLP/gRPC observability config and resource attributes - Instrument webhook and onboarding handlers with tracing and metrics - Add OpenTelemetry dependencies to go.mod/go.sum - Update NixOS modules to configure observability settings
This commit is contained in:
parent
bbfe6867c8
commit
81f34676c4
11 changed files with 307 additions and 29 deletions
|
|
@ -7,11 +7,17 @@ import (
|
|||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/codes"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
"maunium.net/go/mautrix/bridgev2"
|
||||
"maunium.net/go/mautrix/event"
|
||||
"maunium.net/go/mautrix/format"
|
||||
"maunium.net/go/mautrix/id"
|
||||
|
||||
"sneeuwvlok/packages/arrtrix/pkg/observability"
|
||||
)
|
||||
|
||||
const ArrWebhookPath = "/_arrtrix/webhook"
|
||||
|
|
@ -69,32 +75,65 @@ func MountArr(router *http.ServeMux, bridge *bridgev2.Bridge) error {
|
|||
}
|
||||
|
||||
func (h *ArrHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
start := time.Now()
|
||||
ctx, span := observability.StartSpan(r.Context(), "arrtrix.webhook.handle", trace.WithSpanKind(trace.SpanKindServer))
|
||||
defer span.End()
|
||||
|
||||
statusCode := http.StatusAccepted
|
||||
outcome := "ok"
|
||||
eventType := ""
|
||||
defer func() {
|
||||
observability.RecordWebhook(ctx, eventType, outcome, statusCode, time.Since(start))
|
||||
}()
|
||||
|
||||
var body payload
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||||
statusCode = http.StatusBadRequest
|
||||
outcome = "invalid_payload"
|
||||
span.RecordError(err)
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
http.Error(w, "invalid webhook payload", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
if strings.TrimSpace(body.EventType) == "" {
|
||||
statusCode = http.StatusBadRequest
|
||||
outcome = "missing_event_type"
|
||||
span.SetStatus(codes.Error, "missing eventType")
|
||||
http.Error(w, "missing eventType", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
eventType = body.EventType
|
||||
span.SetAttributes(
|
||||
attribute.String("arrtrix.webhook.event_type", body.EventType),
|
||||
attribute.String("http.method", r.Method),
|
||||
attribute.String("http.route", ArrWebhookPath),
|
||||
)
|
||||
|
||||
roomID, err := h.resolver.ResolveManagementRoom(r.Context())
|
||||
roomID, err := h.resolver.ResolveManagementRoom(ctx)
|
||||
if err != nil {
|
||||
status := http.StatusInternalServerError
|
||||
statusCode = http.StatusInternalServerError
|
||||
outcome = "resolve_failed"
|
||||
if errors.Is(err, ErrNoManagementRoom) || errors.Is(err, ErrAmbiguousManagementRoom) {
|
||||
status = http.StatusConflict
|
||||
statusCode = http.StatusConflict
|
||||
outcome = "routing_conflict"
|
||||
}
|
||||
http.Error(w, err.Error(), status)
|
||||
span.RecordError(err)
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
http.Error(w, err.Error(), statusCode)
|
||||
return
|
||||
}
|
||||
|
||||
if err = h.sender.SendNotice(r.Context(), roomID, renderNotice(body)); err != nil {
|
||||
if err = h.sender.SendNotice(ctx, roomID, renderNotice(body)); err != nil {
|
||||
statusCode = http.StatusBadGateway
|
||||
outcome = "delivery_failed"
|
||||
span.RecordError(err)
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
http.Error(w, "failed to deliver webhook", http.StatusBadGateway)
|
||||
return
|
||||
}
|
||||
|
||||
w.WriteHeader(http.StatusAccepted)
|
||||
span.SetStatus(codes.Ok, "")
|
||||
w.WriteHeader(statusCode)
|
||||
}
|
||||
|
||||
type bridgeRoomResolver struct {
|
||||
|
|
@ -102,8 +141,13 @@ type bridgeRoomResolver struct {
|
|||
}
|
||||
|
||||
func (r bridgeRoomResolver) ResolveManagementRoom(ctx context.Context) (id.RoomID, error) {
|
||||
ctx, span := observability.StartSpan(ctx, "arrtrix.webhook.resolve_management_room")
|
||||
defer span.End()
|
||||
|
||||
rows, err := r.bridge.DB.Query(ctx, `SELECT mxid, management_room FROM "user" WHERE bridge_id=$1 AND management_room IS NOT NULL AND management_room <> ''`, r.bridge.ID)
|
||||
if err != nil {
|
||||
span.RecordError(err)
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
return "", fmt.Errorf("failed to query management rooms: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
|
@ -113,6 +157,8 @@ func (r bridgeRoomResolver) ResolveManagementRoom(ctx context.Context) (id.RoomI
|
|||
for rows.Next() {
|
||||
var mxid, managementRoom string
|
||||
if err = rows.Scan(&mxid, &managementRoom); err != nil {
|
||||
span.RecordError(err)
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
return "", fmt.Errorf("failed to scan management room: %w", err)
|
||||
}
|
||||
owners = append(owners, id.UserID(mxid))
|
||||
|
|
@ -121,15 +167,22 @@ func (r bridgeRoomResolver) ResolveManagementRoom(ctx context.Context) (id.RoomI
|
|||
}
|
||||
}
|
||||
if err = rows.Err(); err != nil {
|
||||
span.RecordError(err)
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
return "", fmt.Errorf("failed to iterate management rooms: %w", err)
|
||||
}
|
||||
|
||||
switch len(owners) {
|
||||
case 0:
|
||||
span.SetStatus(codes.Error, ErrNoManagementRoom.Error())
|
||||
return "", ErrNoManagementRoom
|
||||
case 1:
|
||||
span.SetAttributes(attribute.Int("arrtrix.management_room.count", 1))
|
||||
span.SetStatus(codes.Ok, "")
|
||||
return roomID, nil
|
||||
default:
|
||||
span.SetAttributes(attribute.Int("arrtrix.management_room.count", len(owners)))
|
||||
span.SetStatus(codes.Error, ErrAmbiguousManagementRoom.Error())
|
||||
return "", fmt.Errorf("%w: %s", ErrAmbiguousManagementRoom, strings.Join(convertUserIDs(owners), ", "))
|
||||
}
|
||||
}
|
||||
|
|
@ -139,11 +192,23 @@ type bridgeNoticeSender struct {
|
|||
}
|
||||
|
||||
func (s bridgeNoticeSender) SendNotice(ctx context.Context, roomID id.RoomID, markdown string) error {
|
||||
ctx, span := observability.StartSpan(ctx, "arrtrix.webhook.send_notice")
|
||||
defer span.End()
|
||||
span.SetAttributes(attribute.String("matrix.room_id", roomID.String()))
|
||||
|
||||
if err := s.bridge.Bot.EnsureJoined(ctx, roomID); err != nil {
|
||||
span.RecordError(err)
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
return err
|
||||
}
|
||||
content := format.RenderMarkdown(markdown, true, false)
|
||||
_, err := s.bridge.Bot.SendMessage(ctx, roomID, event.EventMessage, &event.Content{Parsed: &content}, nil)
|
||||
if err != nil {
|
||||
span.RecordError(err)
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
return err
|
||||
}
|
||||
span.SetStatus(codes.Ok, "")
|
||||
return err
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue