diff --git a/README.md b/README.md index 45b1357..b2adf3e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Flow Helm Chart -A Helm chart for deploying the Flow workflow engine platform to Kubernetes. +A Helm chart for deploying the Flow workflow automation platform to Kubernetes. ## Overview @@ -9,7 +9,7 @@ Flow is a distributed workflow automation platform consisting of: - **Core Services**: Workflow Engine, Activity Registry, Definition Store, Workflow Logging, Connection Store, Tenant Registry - **Frontend**: Blazor WebAssembly web application - **Activity Services**: 23+ activity implementations for various integrations (HTTP, SQL, Azure, AWS, etc.) -- **Infrastructure**: RabbitMQ for messaging, PostgreSQL/SQL Server for persistence +- **Infrastructure**: RabbitMQ for messaging, PostgreSQL for persistence, Redis for caching (optional) ## Prerequisites @@ -19,70 +19,47 @@ Flow is a distributed workflow automation platform consisting of: ## Quick Start -### Install from Gitea Helm Repository - -The Flow Helm chart is published to the Gitea Package Registry at `https://git.kn.entit.eu`. +### Add the Helm Repository ```bash -# Add the Helm repository (requires authentication for private repos) -helm repo add entit-flow https://git.kn.entit.eu/api/packages/EntitAB/helm \ - --username YOUR_GITEA_USERNAME \ - --password YOUR_GITEA_TOKEN +# Add the Entit Helm repository +helm repo add entit https://git.kn.entit.eu/EntitAB/Helm-Charts/raw/branch/main # Update repository cache helm repo update # Search for available versions -helm search repo entit-flow/flow --versions +helm search repo entit/flow --versions +``` -# Install the chart -helm install flow entit-flow/flow \ +### Install the Chart + +```bash +# Install with default values +helm install flow entit/flow \ + --namespace flow \ + --create-namespace + +# Install with custom values file +helm install flow entit/flow \ --namespace flow \ --create-namespace \ -f values.yaml ``` -### Install from Local Source +### Using Example Values Files + +Example values files are available in the `examples/` directory: ```bash -# Add Helm Dependencies -cd helm/flow -helm dependency update +# Download example values for production +curl -O https://git.kn.entit.eu/EntitAB/Helm-Charts/raw/branch/main/examples/values-prod.yaml -# Install for Development -helm install flow ./helm/flow -f ./helm/flow/values-dev.yaml +# Download example values for development +curl -O https://git.kn.entit.eu/EntitAB/Helm-Charts/raw/branch/main/examples/values-dev.yaml -# Install for Production -helm install flow ./helm/flow \ - -f ./helm/flow/values-prod.yaml \ - --set global.azureAd.tenantId=YOUR_TENANT_ID \ - --set global.azureAd.clientId=YOUR_CLIENT_ID \ - --set global.azureAd.clientSecret=YOUR_CLIENT_SECRET -``` - -## Helm Repository Setup - -### Using in Kubernetes (from Gitea Registry) - -After the chart is published to Gitea, you can install it in any Kubernetes cluster: - -```bash -# 1. Add the Gitea Helm repository -helm repo add entit-flow https://git.kn.entit.eu/api/packages/EntitAB/helm \ - --username $GITEA_USER \ - --password $GITEA_TOKEN - -# 2. Update repositories -helm repo update - -# 3. Install the chart (development) -helm install flow entit-flow/flow \ - --namespace flow \ - --create-namespace \ - -f values-dev.yaml - -# 4. Install the chart (production) -helm install flow entit-flow/flow \ +# Install with production values +helm install flow entit/flow \ --namespace flow \ --create-namespace \ -f values-prod.yaml \ @@ -90,136 +67,27 @@ helm install flow entit-flow/flow \ --set global.azureAd.clientId=YOUR_CLIENT_ID ``` -### Using with ArgoCD - -Create an ArgoCD Application that references the Gitea Helm repository: - -```yaml -apiVersion: argoproj.io/v1alpha1 -kind: Application -metadata: - name: flow - namespace: argocd -spec: - project: default - source: - repoURL: https://git.kn.entit.eu/api/packages/EntitAB/helm - chart: flow - targetRevision: 0.1.0 - helm: - valueFiles: - - values-prod.yaml - parameters: - - name: global.azureAd.tenantId - value: YOUR_TENANT_ID - - name: global.azureAd.clientId - value: YOUR_CLIENT_ID - destination: - server: https://kubernetes.default.svc - namespace: flow - syncPolicy: - automated: - prune: true - selfHeal: true - syncOptions: - - CreateNamespace=true ---- -# Repository credentials secret for ArgoCD -apiVersion: v1 -kind: Secret -metadata: - name: gitea-helm-repo - namespace: argocd - labels: - argocd.argoproj.io/secret-type: repository -stringData: - type: helm - url: https://git.kn.entit.eu/api/packages/EntitAB/helm - username: YOUR_GITEA_USER - password: YOUR_GITEA_TOKEN -``` - -### Using with Flux CD - -```yaml -apiVersion: source.toolkit.fluxcd.io/v1beta2 -kind: HelmRepository -metadata: - name: entit-flow - namespace: flux-system -spec: - interval: 1h - url: https://git.kn.entit.eu/api/packages/EntitAB/helm - secretRef: - name: gitea-helm-auth ---- -apiVersion: v1 -kind: Secret -metadata: - name: gitea-helm-auth - namespace: flux-system -stringData: - username: YOUR_GITEA_USER - password: YOUR_GITEA_TOKEN ---- -apiVersion: helm.toolkit.fluxcd.io/v2beta1 -kind: HelmRelease -metadata: - name: flow - namespace: flow -spec: - interval: 5m - chart: - spec: - chart: flow - version: "0.1.0" - sourceRef: - kind: HelmRepository - name: entit-flow - namespace: flux-system - valuesFrom: - - kind: ConfigMap - name: flow-values - valuesKey: values.yaml -``` - -### Publishing Charts (CI/CD) - -The chart is automatically published to Gitea when changes are pushed to the `main` branch. The CI pipeline: - -1. Lints and validates the chart -2. Runs unit tests -3. Packages the chart -4. Pushes to Gitea Package Registry at `https://git.kn.entit.eu/api/packages/EntitAB/helm` - -To publish manually: - -```bash -# Set credentials -export GITEA_USER=your-username -export GITEA_TOKEN=your-token - -# Package and push -make push-gitea -``` - -**Required GitHub Secrets for CI:** -- `GITEA_USER` - Gitea username -- `GITEA_TOKEN` - Gitea personal access token with `write:package` scope - ## Configuration ### Global Configuration | Parameter | Description | Default | |-----------|-------------|---------| -| `global.imageRegistry` | Container registry for all images | `""` | +| `global.imageRegistry` | Container registry for all images | `cr.kn.entit.eu` | | `global.imagePullSecrets` | Image pull secrets | `[]` | | `global.azureAd.enabled` | Enable Azure AD authentication | `true` | | `global.azureAd.tenantId` | Azure AD tenant ID | `""` | | `global.azureAd.clientId` | Azure AD application client ID | `""` | | `global.database.provider` | Database provider (Postgres/SqlServer) | `Postgres` | -| `global.rabbitmq.host` | RabbitMQ host | `{{ .Release.Name }}-rabbitmq` | + +### Service URLs + +All internal services communicate using full Kubernetes FQDN format: +``` +http://..svc.cluster.local: +``` + +This ensures reliable cross-namespace communication when services run in separate pods. ### Core Services @@ -252,156 +120,73 @@ awsS3Activity: enabled: false ``` -### Database Configuration +## External Managed Services -#### Using Built-in PostgreSQL +For production deployments, use external managed services instead of the built-in infrastructure. + +### External PostgreSQL + +Supports Azure Database for PostgreSQL, AWS RDS, Google Cloud SQL, and other managed PostgreSQL services. ```yaml -postgresql: - enabled: true - auth: - username: flow - password: your-password - database: flow -``` - -#### Using External PostgreSQL - -```yaml -postgresql: - enabled: false - global: database: - provider: Postgres + provider: "Postgres" postgres: - host: your-postgres-host.postgres.database.azure.com + external: true + host: "myserver.postgres.database.azure.com" port: 5432 - database: flow - username: flow - existingSecret: your-db-secret - existingSecretKey: password -``` + database: "flow_prod" + username: "flow@myserver" # Azure format: user@server + existingSecret: "flow-db-secret" + existingSecretKey: "postgres-password" + sslMode: "require" + pooling: + minSize: 10 + maxSize: 200 -#### Using SQL Server - -```yaml postgresql: - enabled: false - -global: - database: - provider: SqlServer - sqlServer: - connectionString: "Server=your-server;Database=flow;User Id=flow;Password=xxx;" + enabled: false # Disable built-in PostgreSQL ``` -### RabbitMQ Configuration +### External RabbitMQ -#### Using Built-in RabbitMQ +Supports CloudAMQP, Amazon MQ, and self-hosted clusters. ```yaml -rabbitmq: - enabled: true - auth: - username: flow - password: your-password -``` - -#### Using External RabbitMQ - -```yaml -rabbitmq: - enabled: false - global: rabbitmq: - host: your-rabbitmq-host - username: flow - existingSecret: rabbitmq-secret - existingSecretKey: password -``` - -## Ingress Configuration - -### NGINX Ingress with TLS - -```yaml -frontendWeb: - ingress: - enabled: true - className: nginx - annotations: - nginx.ingress.kubernetes.io/ssl-redirect: "true" - cert-manager.io/cluster-issuer: letsencrypt-prod - hosts: - - host: flow.example.com - paths: - - path: / - pathType: Prefix + external: true + host: "xyz.rmq.cloudamqp.com" + port: 5672 + username: "flow" + existingSecret: "flow-rabbitmq-secret" + existingSecretKey: "rabbitmq-password" + vhost: "/" tls: - - secretName: flow-tls - hosts: - - flow.example.com + enabled: true + +rabbitmq: + enabled: false # Disable built-in RabbitMQ ``` -## Security +### External Redis -### Pod Security +Supports Azure Cache for Redis, Amazon ElastiCache, Redis Cloud, and self-hosted Redis. +#### Standalone Mode ```yaml -podSecurityContext: - fsGroup: 1000 - runAsNonRoot: true +global: + redis: + enabled: true + external: true + mode: "standalone" + host: "myredis.redis.cache.windows.net" + port: 6380 + existingSecret: "flow-redis-secret" + existingSecretKey: "redis-password" + tls: + enabled: true -securityContext: - runAsNonRoot: true - runAsUser: 1000 - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true -``` - -### Network Policies - -Enable network policies for production: - -```yaml -networkPolicy: - enabled: true -``` - -## Upgrading - -```bash -helm upgrade flow ./helm/flow -f values-prod.yaml -``` - -## Uninstalling - -```bash -helm uninstall flow -``` - -**Note**: This will not delete PVCs. To completely remove data: - -```bash -kubectl delete pvc -l app.kubernetes.io/instance=flow -``` - -## Building Docker Images - -Each service has a Dockerfile. Build all images: - -```bash -# Build all services -for service in WorkflowEngine ActivityRegistry DefinitionStore WorkflowLogging ConnectionStore TenantRegistry; do - docker build -t niblo/flow-${service}:latest -f $service/Dockerfile . -done - -# Push to Docker Hub -for service in WorkflowEngine ActivityRegistry DefinitionStore WorkflowLogging ConnectionStore TenantRegistry; do - docker push niblo/flow-${service}:latest -done +redis: + enabled: false # Disable built-in Redis diff --git a/examples/values-prod.yaml b/examples/values-prod.yaml index a07b1b3..bb1e5d0 100644 --- a/examples/values-prod.yaml +++ b/examples/values-prod.yaml @@ -15,23 +15,139 @@ global: clientSecret: "" # Set via --set or external secret scopes: "access_as_user" + # ========================================================================== + # External PostgreSQL (Managed Service) + # ========================================================================== + # Example: Azure Database for PostgreSQL, AWS RDS, Google Cloud SQL database: provider: "Postgres" postgres: + # Use external managed PostgreSQL + external: true + # PostgreSQL host FQDN + # Azure: myserver.postgres.database.azure.com + # AWS RDS: mydb.xxxx.region.rds.amazonaws.com + # GCP: /cloudsql/project:region:instance (for Cloud SQL Proxy) host: "your-postgres-host.postgres.database.azure.com" port: 5432 database: "flow_prod" + # Azure PostgreSQL username format: username@servername + # AWS/GCP: just the username username: "flow@your-postgres-host" + # Use Kubernetes secret for password (recommended for production) existingSecret: "flow-db-secret" existingSecretKey: "postgres-password" + # SSL mode - use 'require' or 'verify-full' for production + sslMode: "require" + # Additional connection parameters + extraParams: "" + # Connection pool settings for production workloads + pooling: + minSize: 10 + maxSize: 200 + connectionLifetime: 3600 + connectionIdleLifetime: 300 + # ========================================================================== + # External RabbitMQ (Managed Service) + # ========================================================================== + # Example: CloudAMQP, Amazon MQ, or self-hosted cluster rabbitmq: - host: "your-rabbitmq-host" + # Use external managed RabbitMQ + external: true + # RabbitMQ host FQDN + # CloudAMQP: xyz.rmq.cloudamqp.com + # Amazon MQ: b-xxxx.mq.region.amazonaws.com + host: "your-rabbitmq-host.cloudamqp.com" + # Additional cluster hosts (for HA setups) + additionalHosts: [] + # - "rabbitmq-node-2.example.com" + # - "rabbitmq-node-3.example.com" + port: 5672 + managementPort: 15672 username: "flow" + # Use Kubernetes secret for password (recommended for production) existingSecret: "flow-rabbitmq-secret" existingSecretKey: "rabbitmq-password" + vhost: "/" + prefetch: 10 # Higher prefetch for production throughput + # Exchange names + activityExchange: "workflow.activities" + completedExchange: "workflow.completed" + workflowEventsExchange: "workflow.logging" + # TLS settings (enable for managed services) + tls: + enabled: true + insecureSkipVerify: false + # ========================================================================== + # External Redis (Managed Service) + # ========================================================================== + # Example: Azure Cache for Redis, Amazon ElastiCache, Redis Cloud + redis: + # Enable Redis for caching and distributed locking + enabled: true + # Use external managed Redis + external: true + + # Redis mode: standalone, sentinel, or cluster + # Azure Cache for Redis: use 'standalone' for Basic/Standard, 'cluster' for Premium/Enterprise clustered + # Amazon ElastiCache: use 'standalone' for single node, 'cluster' for cluster mode enabled + mode: "standalone" # or "cluster" for clustered deployments + + # --- Standalone mode configuration --- + # Azure Cache for Redis: myredis.redis.cache.windows.net + # Amazon ElastiCache: my-cluster.xxxx.cache.amazonaws.com + host: "your-redis.redis.cache.windows.net" + port: 6380 # Azure uses 6380 for TLS, ElastiCache uses 6379 + + # --- Cluster mode configuration --- + # Uncomment and configure for Redis Cluster mode + # cluster: + # nodes: + # - "your-redis.redis.cache.windows.net:6380" + # maxRedirects: 3 + + # --- Sentinel mode configuration --- + # Uncomment and configure for Redis Sentinel + # sentinel: + # masterName: "mymaster" + # nodes: + # - "sentinel-0:26379" + # - "sentinel-1:26379" + # - "sentinel-2:26379" + + # Authentication + # Azure Cache for Redis requires username (default is empty or access key name) + username: "" + # Use Kubernetes secret for password/access key (recommended for production) + existingSecret: "flow-redis-secret" + existingSecretKey: "redis-password" + + # Database index (0-15, only for standalone/sentinel mode) + database: 0 + + # TLS settings (required for Azure Cache for Redis) + tls: + enabled: true + insecureSkipVerify: false + + # Connection settings optimized for production + connectTimeout: 5000 + syncTimeout: 5000 + keepAlive: 60 + retry: + maxAttempts: 3 + baseDelay: 100 + maxDelay: 5000 + pool: + minSize: 10 + maxSize: 100 + +# ============================================================================= # Core services - production replicas with autoscaling +# ============================================================================= + workflowEngine: replicaCount: 3 autoscaling: @@ -146,7 +262,10 @@ frontendWeb: hosts: - flow.your-domain.com +# ============================================================================= # Activity services - production resources +# ============================================================================= + activities: resources: limits: @@ -248,17 +367,28 @@ awsSqsActivity: enabled: true replicaCount: 1 -# External infrastructure in production (managed services) +m365Activity: + enabled: true + replicaCount: 1 + +# ============================================================================= +# Infrastructure - Disabled (using external managed services) +# ============================================================================= + +# Internal deployments disabled - using external managed services rabbitmq: - enabled: false # Use Azure Service Bus or managed RabbitMQ + enabled: false postgresql: - enabled: false # Use Azure Database for PostgreSQL + enabled: false redis: - enabled: false # Use Azure Cache for Redis if needed + enabled: false + +# ============================================================================= +# Production Security +# ============================================================================= -# Production security podSecurityContext: fsGroup: 1000 runAsNonRoot: true diff --git a/examples/values.yaml b/examples/values.yaml index 7e5fb46..1e66a59 100644 --- a/examples/values.yaml +++ b/examples/values.yaml @@ -27,17 +27,20 @@ global: # -- SQL Server connection string (when provider is SqlServer) sqlServer: connectionString: "" + # -- Use existing secret for connection string + existingSecret: "" + existingSecretKey: "connection-string" # -- PostgreSQL configuration (when provider is Postgres) postgres: - # -- Use external PostgreSQL instance (set to true if PostgreSQL is already deployed in cluster or external) + # -- Use external PostgreSQL instance (set to true for managed services like Azure Database for PostgreSQL, AWS RDS, etc.) external: false - # -- PostgreSQL host (single host or primary for HA, ignored if external=false and internal deployment is used) + # -- PostgreSQL host (FQDN for managed services, e.g., myserver.postgres.database.azure.com) host: "postgresql" # -- PostgreSQL port port: 5432 # -- Database name database: "flow" - # -- Database username + # -- Database username (for Azure, use format: username@servername) username: "flow" # -- Database password (ignored if existingSecret is set) password: "" @@ -45,17 +48,29 @@ global: existingSecret: "" # -- Key in existing secret containing the password existingSecretKey: "postgres-password" - # -- SSL mode: disable, require, verify-ca, verify-full + # -- SSL mode: disable, allow, prefer, require, verify-ca, verify-full sslMode: "prefer" - # -- Additional connection parameters + # -- Additional connection parameters (e.g., "sslrootcert=/path/to/ca.crt&application_name=flow") extraParams: "" + # -- Connection pool settings + pooling: + # -- Minimum pool size + minSize: 5 + # -- Maximum pool size + maxSize: 100 + # -- Connection lifetime in seconds (0 = infinite) + connectionLifetime: 0 + # -- Connection idle timeout in seconds + connectionIdleLifetime: 300 # -- RabbitMQ configuration rabbitmq: - # -- Use external RabbitMQ instance + # -- Use external RabbitMQ instance (set to true for managed services like CloudAMQP, Amazon MQ, etc.) external: false - # -- RabbitMQ host (single host or first node for HA, ignored if external=false) + # -- RabbitMQ host (FQDN for managed services) host: "rabbitmq" + # -- Additional hosts for cluster connection (comma-separated or list) + additionalHosts: [] # -- RabbitMQ AMQP port port: 5672 # -- RabbitMQ management port @@ -83,41 +98,114 @@ global: enabled: false # -- Skip TLS certificate verification (not recommended for production) insecureSkipVerify: false + # -- Path to CA certificate (for self-signed certs) + caCertPath: "" + # -- Use existing secret containing TLS certificates + existingSecret: "" + # -- Key in existing secret containing CA certificate + caCertKey: "ca.crt" # -- Redis configuration redis: # -- Enable Redis (for caching and distributed locking) enabled: false - # -- Use external Redis instance + # -- Use external Redis instance (set to true for managed services like Azure Cache for Redis, Amazon ElastiCache, etc.) external: false - # -- Redis host (single host or sentinel master name for HA) + # -- Redis deployment mode: standalone, sentinel, or cluster + mode: "standalone" + + # -- Standalone/Sentinel configuration + # -- Redis host (single host for standalone, master name for sentinel, ignored for cluster mode) host: "redis" - # -- Redis port (standard port or sentinel port) + # -- Redis port (6379 for standalone, 26379 for sentinel) port: 6379 + + # -- Redis Sentinel configuration (when mode=sentinel) + sentinel: + # -- Sentinel master name + masterName: "mymaster" + # -- Sentinel nodes (list of host:port) + nodes: [] + # Example: + # - "sentinel-0.sentinel:26379" + # - "sentinel-1.sentinel:26379" + # - "sentinel-2.sentinel:26379" + # -- Sentinel password (if different from Redis password) + password: "" + # -- Use existing secret for sentinel password + existingSecret: "" + existingSecretKey: "sentinel-password" + + # -- Redis Cluster configuration (when mode=cluster) + cluster: + # -- Cluster nodes (list of host:port for cluster endpoints) + nodes: [] + # Example for Azure Cache for Redis (clustered): + # - "myredis.redis.cache.windows.net:6380" + # Example for Amazon ElastiCache cluster: + # - "cluster-endpoint.cache.amazonaws.com:6379" + # Example for self-hosted cluster: + # - "redis-node-0:6379" + # - "redis-node-1:6379" + # - "redis-node-2:6379" + # -- Maximum number of redirect hops + maxRedirects: 3 + + # -- Redis username (Redis 6+ ACL, required for Azure Cache for Redis) + username: "" # -- Redis password (ignored if existingSecret is set) password: "" # -- Use existing secret for password existingSecret: "" # -- Key in existing secret containing the password existingSecretKey: "redis-password" - # -- Redis database index + # -- Redis database index (only for standalone/sentinel, not supported in cluster mode) database: 0 - # -- Use TLS for connection + + # -- TLS configuration tls: + # -- Enable TLS (required for most managed Redis services) enabled: false - # -- Connection timeout in milliseconds + # -- Skip TLS certificate verification (not recommended for production) + insecureSkipVerify: false + # -- Path to CA certificate + caCertPath: "" + # -- Use existing secret containing TLS certificates + existingSecret: "" + caCertKey: "ca.crt" + + # -- Connection settings connectTimeout: 5000 # -- Sync timeout in milliseconds syncTimeout: 5000 + # -- Keep alive interval in seconds (0 to disable) + keepAlive: 60 + # -- Retry settings + retry: + # -- Maximum retry attempts + maxAttempts: 3 + # -- Base delay between retries in milliseconds + baseDelay: 100 + # -- Maximum delay between retries in milliseconds + maxDelay: 5000 + + # -- Connection pool settings + pool: + # -- Minimum number of connections to maintain + minSize: 5 + # -- Maximum number of connections + maxSize: 50 - # -- Service URLs (internal Kubernetes DNS) + # -- Service URLs (internal Kubernetes DNS - uses full FQDN for cross-namespace communication) + # Format: http://..svc.cluster.local: + # These are templated and will be resolved during Helm rendering serviceUrls: - activityRegistry: "http://{{ .Release.Name }}-activity-registry:80" - definitionStore: "http://{{ .Release.Name }}-definition-store:80" - workflowEngine: "http://{{ .Release.Name }}-workflow-engine:80" - workflowLogging: "http://{{ .Release.Name }}-workflow-logging:80" - connectionStore: "http://{{ .Release.Name }}-connection-store:80" - tenantRegistry: "http://{{ .Release.Name }}-tenant-registry:80" + activityRegistry: "http://{{ .Release.Name }}-activity-registry.{{ .Release.Namespace }}.svc.cluster.local:80" + definitionStore: "http://{{ .Release.Name }}-definition-store.{{ .Release.Namespace }}.svc.cluster.local:80" + workflowEngine: "http://{{ .Release.Name }}-workflow-engine.{{ .Release.Namespace }}.svc.cluster.local:80" + workflowLogging: "http://{{ .Release.Name }}-workflow-logging.{{ .Release.Namespace }}.svc.cluster.local:80" + connectionStore: "http://{{ .Release.Name }}-connection-store.{{ .Release.Namespace }}.svc.cluster.local:80" + tenantRegistry: "http://{{ .Release.Name }}-tenant-registry.{{ .Release.Namespace }}.svc.cluster.local:80" # ============================================================================= # Core Services