diff --git a/.github/workflows/pipeline.yml b/.github/workflows/pipeline.yml index 2d38b3103..881d5815b 100644 --- a/.github/workflows/pipeline.yml +++ b/.github/workflows/pipeline.yml @@ -6,15 +6,10 @@ on: pull_request: branches: [ pipeline-optimization ] -# Add concurrency limit to prevent resource contention -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - jobs: build-with-metrics: runs-on: ubuntu-latest - timeout-minutes: 60 # Add overall job timeout + timeout-minutes: 60 services: prometheus: @@ -26,7 +21,6 @@ jobs: --health-interval 10s --health-timeout 5s --health-retries 3 - --health-start-period 10s pushgateway: image: prom/pushgateway:latest @@ -37,14 +31,11 @@ jobs: --health-interval 10s --health-timeout 5s --health-retries 3 - --health-start-period 10s steps: - uses: actions/checkout@v4 - with: - fetch-depth: 0 # Fetch all history for better metrics - # Installation and setup of monitoring tools with error handling + # Installation and setup of monitoring tools - name: Setup monitoring tools id: setup-monitoring timeout-minutes: 5 @@ -56,39 +47,18 @@ jobs: sudo apt-get install -y powerstat linux-tools-common linux-tools-generic || (echo "Failed to install powerstat and linux tools" && exit 1) echo "::endgroup::" - echo "::group::Installing PowerAPI" - sudo snap install powerapi || (echo "Failed to install PowerAPI" && exit 1) - echo "::endgroup::" - echo "::group::Setting up node exporter" curl -L --retry 3 https://github.com/prometheus/node_exporter/releases/download/v1.3.1/node_exporter-1.3.1.linux-amd64.tar.gz -o node_exporter.tar.gz || (echo "Failed to download node exporter" && exit 1) tar xvfz node_exporter.tar.gz || (echo "Failed to extract node exporter" && exit 1) echo "::endgroup::" - # Start monitoring tools with improved configuration and error handling + # Start monitoring tools with improved configuration - name: Start monitoring id: start-monitoring timeout-minutes: 2 run: | set -eo pipefail - # Start PowerAPI with retry mechanism - max_retries=3 - retry_count=0 - while [ $retry_count -lt $max_retries ]; do - if sudo powerapi --pid $$ --frequency 1000 --output prometheus --pushgateway-url http://localhost:9091/metrics/job/powerapi & then - echo "POWERAPI_PID=$!" >> $GITHUB_ENV - break - fi - retry_count=$((retry_count+1)) - sleep 5 - done - - if [ $retry_count -eq $max_retries ]; then - echo "Failed to start PowerAPI after $max_retries attempts" - exit 1 - fi - # Start node exporter with health check ./node_exporter-*/node_exporter --web.listen-address=":9100" & echo "NODE_EXPORTER_PID=$!" >> $GITHUB_ENV @@ -112,7 +82,7 @@ jobs: run: | set -eo pipefail start_time=$(date +%s%N) - ./mvnw -B verify -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn + ./mvnw -B verify build_status=$? end_time=$(date +%s%N) echo "BUILD_TIME=$((($end_time - $start_time)/1000000))" >> $GITHUB_ENV @@ -120,12 +90,12 @@ jobs: - name: Run tests id: test - if: success() || failure() # Run even if build fails + if: success() || failure() timeout-minutes: 20 run: | set -eo pipefail start_time=$(date +%s%N) - ./mvnw test -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn + ./mvnw test test_status=$? end_time=$(date +%s%N) echo "TEST_TIME=$((($end_time - $start_time)/1000000))" >> $GITHUB_ENV @@ -160,7 +130,6 @@ jobs: start_time=$(date +%s%N) kubectl apply -f k8s/ || (echo "Failed to apply Kubernetes manifests" && exit 1) - # Wait for deployment with proper error handling if ! kubectl wait --for=condition=ready pod -l app=petclinic --timeout=180s; then echo "::error::Deployment failed - collecting debug information" kubectl describe pods -l app=petclinic @@ -171,14 +140,14 @@ jobs: end_time=$(date +%s%N) echo "DEPLOY_TIME=$((($end_time - $start_time)/1000000))" >> $GITHUB_ENV - # Export metrics with improved error handling + # Export metrics with proper function definition - name: Export metrics to Prometheus - if: always() # Run even if previous steps failed + if: always() timeout-minutes: 5 run: | set -eo pipefail - # Function to safely export metric + # Define the export_metric function export_metric() { local metric_name=$1 local metric_value=$2 @@ -197,23 +166,28 @@ jobs: export_metric "pipeline_test_duration_ms" "${TEST_TIME}" "test" export_metric "pipeline_docker_build_duration_ms" "${DOCKER_BUILD_TIME}" "docker-build" export_metric "pipeline_deploy_duration_ms" "${DEPLOY_TIME}" "deploy" - - # Export power consumption metrics with error handling - if [ -f energy_metrics.csv ]; then - while IFS=, read -r timestamp watts; do - export_metric "power_consumption_watts" "$watts" "power" || continue - done < energy_metrics.csv - else - echo "::warning::energy_metrics.csv not found" - fi - # Collect additional resource metrics + # Collect additional resource metrics with function definition - name: Collect resource metrics if: always() timeout-minutes: 2 run: | set -eo pipefail + # Define the export_metric function again since it's a new shell context + export_metric() { + local metric_name=$1 + local metric_value=$2 + local stage=$3 + + if [ -n "$metric_value" ]; then + echo "${metric_name}{stage=\"${stage}\",project=\"petclinic\"} ${metric_value}" | \ + curl --retry 3 --retry-delay 2 --max-time 10 --silent --show-error \ + --data-binary @- http://localhost:9091/metrics/job/petclinic-pipeline || \ + echo "::warning::Failed to export ${metric_name} for ${stage}" + fi + } + # Memory usage metric with error handling mem_usage=$(free -b | grep Mem: | awk '{print $3}') || echo "::warning::Failed to collect memory usage" if [ -n "$mem_usage" ]; then @@ -226,7 +200,7 @@ jobs: export_metric "pipeline_cpu_usage_percent" "$cpu_usage" "cpu" fi - # Stop monitoring tools and collect metrics + # Collect final metrics - name: Collect final metrics if: always() timeout-minutes: 5 @@ -236,11 +210,7 @@ jobs: # End timestamp date +%s%N > pipeline_end_time.txt - # Stop monitoring processes safely - if [ -n "$POWERAPI_PID" ]; then - sudo kill $POWERAPI_PID || echo "::warning::Failed to stop PowerAPI" - fi - + # Stop node exporter if [ -n "$NODE_EXPORTER_PID" ]; then kill $NODE_EXPORTER_PID || echo "::warning::Failed to stop node exporter" fi @@ -268,7 +238,6 @@ jobs: with: name: pipeline-metrics path: | - energy_metrics.csv system_metrics.txt memory_metrics.txt disk_metrics.txt