From c304c37c51d7222b80ef77bbee327664eec3e360 Mon Sep 17 00:00:00 2001 From: Tim Collins Date: Fri, 24 Jan 2025 09:37:23 +0000 Subject: [PATCH] add megalinter config. Fix my rubbish code to appease the linter Signed-off-by: Tim Collins --- scripts/release-analysis/.mega-linter.yaml | 21 ++++ scripts/release-analysis/Dockerfile | 2 +- scripts/release-analysis/README.md | 6 + .../release-analysis/fetch_helmet_releases.py | 48 +++++--- scripts/release-analysis/fetch_releases.py | 30 +++-- scripts/release-analysis/main.py | 21 ++-- scripts/release-analysis/merge_csvs.py | 103 ++++++++++++------ scripts/release-analysis/plot_graph.py | 41 ++++--- 8 files changed, 182 insertions(+), 90 deletions(-) create mode 100644 scripts/release-analysis/.mega-linter.yaml diff --git a/scripts/release-analysis/.mega-linter.yaml b/scripts/release-analysis/.mega-linter.yaml new file mode 100644 index 00000000..024abb2b --- /dev/null +++ b/scripts/release-analysis/.mega-linter.yaml @@ -0,0 +1,21 @@ +# Configuration file for MegaLinter +# Run megalinter locally with: `docker run --rm -v /var/run/docker.sock:/var/run/docker.sock:rw -v $(pwd):/tmp/lint:rw oxsecurity/megalinter:v8` +ENABLE: + - DOCKERFILE + - PYTHON + +DISABLE_LINTERS: + - PYTHON_FLAKE8 + - PYTHON_PYRIGHT + - PYTHON_RUFF + - PYTHON_BANDIT + +CLEAR_REPORT_FOLDER: true +PYTHON_PYLINT_PRE_COMMANDS: + - command: pip install -r /tmp/lint/requirements.txt + venv: pylint +REPORT_OUTPUT_FOLDER: none + + +# You might want to enable this locally to fix some stuff without guessing what you need to change. Check before committing to git. +#APPLY_FIXES: all diff --git a/scripts/release-analysis/Dockerfile b/scripts/release-analysis/Dockerfile index 4cf11225..17674252 100644 --- a/scripts/release-analysis/Dockerfile +++ b/scripts/release-analysis/Dockerfile @@ -5,4 +5,4 @@ RUN pip install --no-cache-dir -r requirements.txt COPY main.py fetch_releases.py fetch_helmet_releases.py merge_csvs.py plot_graph.py ./ RUN chmod +x main.py fetch_releases.py fetch_helmet_releases.py merge_csvs.py plot_graph.py -CMD [ "python", "./main.py" ] +CMD [ "python", "-u", "./main.py" ] diff --git a/scripts/release-analysis/README.md b/scripts/release-analysis/README.md index 1e2877d4..ab746cee 100644 --- a/scripts/release-analysis/README.md +++ b/scripts/release-analysis/README.md @@ -19,3 +19,9 @@ docker run --rm -e GITHUB_TOKEN=$GITHUB_TOKEN -v ${PWD}:/app team-helm-analysis ``` You should get 3 csvs and 4 graphs once this completes. It takes around 5 mins to run. + + +## Linting +I ran megalinter against this just to give some confidence that it's not completely broken. It's not perfect, but it's something. + +Run megalinter locally against this directory with: `docker run --rm -v /var/run/docker.sock:/var/run/docker.sock:rw -v $(pwd):/tmp/lint:rw oxsecurity/megalinter:v8` diff --git a/scripts/release-analysis/fetch_helmet_releases.py b/scripts/release-analysis/fetch_helmet_releases.py index b115985d..77e2fcb3 100644 --- a/scripts/release-analysis/fetch_helmet_releases.py +++ b/scripts/release-analysis/fetch_helmet_releases.py @@ -1,36 +1,40 @@ -import requests import csv -from datetime import datetime import os +from datetime import datetime + +import requests # GitHub repository URL repo_url = "https://api.github.com/repos/argoproj/argo-helm/releases" # Get the GitHub token from environment variables -github_token = os.getenv('GITHUB_TOKEN') +github_token = os.getenv("GITHUB_TOKEN") if not github_token: raise ValueError("GITHUB_TOKEN environment variable is not set") + # Function to fetch all releases with pagination def fetch_all_releases(url): releases = [] - headers = {'Authorization': f'token {github_token}'} + headers = {"Authorization": f"token {github_token}"} while url: response = requests.get(url, headers=headers) response.raise_for_status() releases.extend(response.json()) - url = response.links.get('next', {}).get('url') + url = response.links.get("next", {}).get("url") return releases + # Function to get the content of Chart.yaml in a release def get_chart_yaml(repo, tag, chart_path): url = f"https://raw.githubusercontent.com/{repo}/refs/tags/{tag}/charts/{chart_path}/Chart.yaml" - headers = {'Authorization': f'token {github_token}'} + headers = {"Authorization": f"token {github_token}"} response = requests.get(url, headers=headers) if response.status_code == 200: return response.text return None + # Function to extract appVersion from Chart.yaml content def extract_app_version(chart_yaml): for line in chart_yaml.splitlines(): @@ -38,41 +42,49 @@ def extract_app_version(chart_yaml): return line.split(":")[1].strip() return None + # Function to fetch releases and write to a CSV file def fetch_and_write_helmet_releases(csv_file): # Fetch all releases releases = fetch_all_releases(repo_url) # Write the release data to the CSV file - with open(csv_file, mode='w', newline='') as file: - writer = csv.writer(file, quoting=csv.QUOTE_NONE, escapechar='\\') + with open(csv_file, mode="w", newline="") as file: + writer = csv.writer(file, quoting=csv.QUOTE_NONE, escapechar="\\") writer.writerow(["Release Name", "Release Date", "Release Time", "App Version"]) for release in releases: - tag_name = release['tag_name'] - published_at = release['published_at'] + tag_name = release["tag_name"] + published_at = release["published_at"] release_date = datetime.strptime(published_at, "%Y-%m-%dT%H:%M:%SZ").date() release_time = datetime.strptime(published_at, "%Y-%m-%dT%H:%M:%SZ").time() # Extract chart path from the release name - chart_path = '-'.join(tag_name.split('-')[:-1]) - current_chart_yaml = get_chart_yaml("argoproj/argo-helm", tag_name, chart_path) + chart_path = "-".join(tag_name.split("-")[:-1]) + current_chart_yaml = get_chart_yaml( + "argoproj/argo-helm", tag_name, chart_path + ) if current_chart_yaml: current_app_version = extract_app_version(current_chart_yaml) - writer.writerow([tag_name, release_date, release_time, current_app_version]) + writer.writerow( + [tag_name, release_date, release_time, current_app_version] + ) # Read the CSV file, remove any instances of `\"`, and write back the cleaned content - with open(csv_file, mode='r') as file: + with open(csv_file, mode="r") as file: content = file.read() - cleaned_content = content.replace('\\"', '') + cleaned_content = content.replace('\\"', "") - with open(csv_file, mode='w', newline='') as file: + with open(csv_file, mode="w", newline="") as file: file.write(cleaned_content) - print(f"Release data has been written to {csv_file} and cleaned of any instances of \\\"") + print( + f'Release data has been written to {csv_file} and cleaned of any instances of \\"' + ) + # Example usage if __name__ == "__main__": - fetch_and_write_helmet_releases('argo_helm_releases.csv') + fetch_and_write_helmet_releases("argo_helm_releases.csv") diff --git a/scripts/release-analysis/fetch_releases.py b/scripts/release-analysis/fetch_releases.py index 3a36ae11..6d27faee 100644 --- a/scripts/release-analysis/fetch_releases.py +++ b/scripts/release-analysis/fetch_releases.py @@ -1,7 +1,8 @@ -import requests import csv -from datetime import datetime import os +from datetime import datetime + +import requests # List of GitHub repository URLs we care about repos = [ @@ -12,38 +13,45 @@ repos = [ ] # Get the GitHub token from environment variables -github_token = os.getenv('GITHUB_TOKEN') +github_token = os.getenv("GITHUB_TOKEN") if not github_token: raise ValueError("GITHUB_TOKEN environment variable is not set") + # Fetch all releases with pagination def fetch_all_releases(url): releases = [] - headers = {'Authorization': f'token {github_token}'} + headers = {"Authorization": f"token {github_token}"} while url: response = requests.get(url, headers=headers) response.raise_for_status() releases.extend(response.json()) - url = response.links.get('next', {}).get('url') + url = response.links.get("next", {}).get("url") return releases + # Fetch releases and write to a CSV file def fetch_and_write_releases(csv_file): - with open(csv_file, mode='w', newline='') as file: + with open(csv_file, mode="w", newline="") as file: writer = csv.writer(file) writer.writerow(["Repository", "Release Tag", "Release Date", "Release Time"]) for repo_name, repo_url in repos: releases = fetch_all_releases(repo_url) for release in releases: - tag_name = release['tag_name'] - published_at = release['published_at'] - release_date = datetime.strptime(published_at, "%Y-%m-%dT%H:%M:%SZ").date() - release_time = datetime.strptime(published_at, "%Y-%m-%dT%H:%M:%SZ").time() + tag_name = release["tag_name"] + published_at = release["published_at"] + release_date = datetime.strptime( + published_at, "%Y-%m-%dT%H:%M:%SZ" + ).date() + release_time = datetime.strptime( + published_at, "%Y-%m-%dT%H:%M:%SZ" + ).time() writer.writerow([repo_name, tag_name, release_date, release_time]) print(f"Release data has been written to {csv_file}") + # Example usage if __name__ == "__main__": - fetch_and_write_releases('argo_releases.csv') + fetch_and_write_releases("argo_releases.csv") diff --git a/scripts/release-analysis/main.py b/scripts/release-analysis/main.py index cd315765..3fcf21ed 100644 --- a/scripts/release-analysis/main.py +++ b/scripts/release-analysis/main.py @@ -1,30 +1,31 @@ -from fetch_releases import fetch_and_write_releases -from fetch_helmet_releases import fetch_and_write_helmet_releases -from merge_csvs import merge_csv_files -from plot_graph import plot_time_difference import os +from fetch_helmet_releases import fetch_and_write_helmet_releases +from fetch_releases import fetch_and_write_releases +from merge_csvs import merge_csv_files +from plot_graph import plot_time_difference + # Check there is a github token -github_token = os.getenv('GITHUB_TOKEN') +github_token = os.getenv("GITHUB_TOKEN") if not github_token: raise ValueError("GITHUB_TOKEN environment variable is not set") # Do the thing print("Fetching releases...") -fetch_and_write_releases('argo_releases.csv') +fetch_and_write_releases("argo_releases.csv") print("Done") print("Fetching Team Helmet releases...") -fetch_and_write_helmet_releases('argo_helm_releases.csv') +fetch_and_write_helmet_releases("argo_helm_releases.csv") print("Done") print("Merging release info...") -merge_csv_files('argo_releases.csv', 'argo_helm_releases.csv', 'merged_releases.csv') +merge_csv_files("argo_releases.csv", "argo_helm_releases.csv", "merged_releases.csv") print("Done") print("Plotting time difference graphs...") -plot_time_difference('merged_releases.csv') +plot_time_difference("merged_releases.csv") print("Done") # Delete __pycache__ directories -os.system('rm -rf __pycache__') +os.system("rm -rf __pycache__") diff --git a/scripts/release-analysis/merge_csvs.py b/scripts/release-analysis/merge_csvs.py index d105d3f0..72b6917b 100644 --- a/scripts/release-analysis/merge_csvs.py +++ b/scripts/release-analysis/merge_csvs.py @@ -1,70 +1,98 @@ import csv from datetime import datetime + def merge_csv_files(csv_file_1, csv_file_2, output_csv_file): # Read the first CSV file into a dictionary releases_1 = {} - with open(csv_file_1, mode='r') as file: + with open(csv_file_1, mode="r") as file: reader = csv.DictReader(file) for row in reader: - key = (row['Repository'], row['Release Tag']) + key = (row["Repository"], row["Release Tag"]) releases_1[key] = row # Read the second CSV file and find the oldest release for each appVersion oldest_releases = {} - valid_repos = {'argo-cd', 'argo-events', 'argo-workflows', 'argo-rollouts'} - with open(csv_file_2, mode='r') as file: + valid_repos = {"argo-cd", "argo-events", "argo-workflows", "argo-rollouts"} + with open(csv_file_2, mode="r") as file: reader = csv.DictReader(file) for row in reader: - release_name = row['Release Name'] - repo_name = '-'.join(release_name.split('-')[:-1]) + release_name = row["Release Name"] + repo_name = "-".join(release_name.split("-")[:-1]) if repo_name in valid_repos: - app_version = row['App Version'] - release_datetime = datetime.strptime(f"{row['Release Date']} {row['Release Time']}", "%Y-%m-%d %H:%M:%S") - if (repo_name, app_version) not in oldest_releases or release_datetime < oldest_releases[(repo_name, app_version)]['datetime']: + app_version = row["App Version"] + release_datetime = datetime.strptime( + f"{row['Release Date']} {row['Release Time']}", "%Y-%m-%d %H:%M:%S" + ) + if ( + repo_name, + app_version, + ) not in oldest_releases or release_datetime < oldest_releases[ + (repo_name, app_version) + ][ + "datetime" + ]: oldest_releases[(repo_name, app_version)] = { - 'row': row, - 'datetime': release_datetime + "row": row, + "datetime": release_datetime, } # Merge the oldest releases with the first CSV file merged_releases = [] for (repo_name, app_version), data in oldest_releases.items(): - row = data['row'] + row = data["row"] for key, release in releases_1.items(): - if repo_name == release['Repository'] and app_version == release['Release Tag']: - time_difference = data['datetime'] - datetime.strptime(f"{release['Release Date']} {release['Release Time']}", "%Y-%m-%d %H:%M:%S") - time_difference_hours = time_difference.total_seconds() / 3600 # Convert to hours + if ( + repo_name == release["Repository"] + and app_version == release["Release Tag"] + ): + time_difference = data["datetime"] - datetime.strptime( + f"{release['Release Date']} {release['Release Time']}", + "%Y-%m-%d %H:%M:%S", + ) + time_difference_hours = ( + time_difference.total_seconds() / 3600 + ) # Convert to hours merged_row = { - 'Repository': release['Repository'], - 'Release Tag': release['Release Tag'], - 'Release Date': release['Release Date'], - 'Release Time': release['Release Time'], - 'App Version': app_version, - 'Release Name': row['Release Name'], - 'Release Date 2': row['Release Date'], - 'Release Time 2': row['Release Time'], - 'Time Difference': time_difference_hours + "Repository": release["Repository"], + "Release Tag": release["Release Tag"], + "Release Date": release["Release Date"], + "Release Time": release["Release Time"], + "App Version": app_version, + "Release Name": row["Release Name"], + "Release Date 2": row["Release Date"], + "Release Time 2": row["Release Time"], + "Time Difference": time_difference_hours, } merged_releases.append(merged_row) break else: merged_row = { - 'Repository': repo_name, - 'Release Tag': '', - 'Release Date': '', - 'Release Time': '', - 'App Version': app_version, - 'Release Name': row['Release Name'], - 'Release Date 2': row['Release Date'], - 'Release Time 2': row['Release Time'], - 'Time Difference': '' + "Repository": repo_name, + "Release Tag": "", + "Release Date": "", + "Release Time": "", + "App Version": app_version, + "Release Name": row["Release Name"], + "Release Date 2": row["Release Date"], + "Release Time 2": row["Release Time"], + "Time Difference": "", } merged_releases.append(merged_row) # Write the merged data to a new CSV file - with open(output_csv_file, mode='w', newline='') as file: - fieldnames = ['Repository', 'Release Tag', 'Release Date', 'Release Time', 'App Version', 'Release Name', 'Release Date 2', 'Release Time 2', 'Time Difference'] + with open(output_csv_file, mode="w", newline="") as file: + fieldnames = [ + "Repository", + "Release Tag", + "Release Date", + "Release Time", + "App Version", + "Release Name", + "Release Date 2", + "Release Time 2", + "Time Difference", + ] writer = csv.DictWriter(file, fieldnames=fieldnames) writer.writeheader() for row in merged_releases: @@ -72,6 +100,9 @@ def merge_csv_files(csv_file_1, csv_file_2, output_csv_file): print(f"Merged data has been written to {output_csv_file}") + # Example usage if __name__ == "__main__": - merge_csv_files('argo_releases.csv', 'argo_helm_releases.csv', 'merged_releases.csv') + merge_csv_files( + "argo_releases.csv", "argo_helm_releases.csv", "merged_releases.csv" + ) diff --git a/scripts/release-analysis/plot_graph.py b/scripts/release-analysis/plot_graph.py index 69e2f233..8657fe20 100644 --- a/scripts/release-analysis/plot_graph.py +++ b/scripts/release-analysis/plot_graph.py @@ -1,17 +1,24 @@ import csv + import matplotlib.pyplot as plt from packaging import version + def plot_time_difference(csv_file): # Read the CSV file and process the data - data = {'argo-cd': [], 'argo-events': [], 'argo-workflows': [], 'argo-rollouts': []} - release_tags = {'argo-cd': [], 'argo-events': [], 'argo-workflows': [], 'argo-rollouts': []} - with open(csv_file, mode='r') as file: + data = {"argo-cd": [], "argo-events": [], "argo-workflows": [], "argo-rollouts": []} + release_tags = { + "argo-cd": [], + "argo-events": [], + "argo-workflows": [], + "argo-rollouts": [], + } + with open(csv_file, mode="r") as file: reader = csv.DictReader(file) for row in reader: - repo = row['Repository'] - time_diff_str = row['Time Difference'] - release_tag = row['Release Tag'] + repo = row["Repository"] + time_diff_str = row["Time Difference"] + release_tag = row["Release Tag"] if repo in data and time_diff_str: time_diff = float(time_diff_str) data[repo].append(time_diff) @@ -19,27 +26,33 @@ def plot_time_difference(csv_file): # Sort the release tags based on semantic versioning for repo in release_tags: - sorted_indices = sorted(range(len(release_tags[repo])), key=lambda i: version.parse(release_tags[repo][i])) + sorted_indices = sorted( + range(len(release_tags[repo])), + key=lambda i: version.parse(release_tags[repo][i]), + ) release_tags[repo] = [release_tags[repo][i] for i in sorted_indices] data[repo] = [data[repo][i] for i in sorted_indices] # Plot the data for repo, time_diffs in data.items(): plt.figure(figsize=(10, 6)) - plt.plot(release_tags[repo], time_diffs, marker='o', label=repo) - plt.axhline(y=72, color='r', linestyle='--', label='SLA (72 hours)') - plt.xlabel('Upstream Release Tag') - plt.ylabel('Time difference between upstream release and Helm Chart release (hours)') - plt.title(f'Time to Release Helm Chart for {repo}') + plt.plot(release_tags[repo], time_diffs, marker="o", label=repo) + plt.axhline(y=72, color="r", linestyle="--", label="SLA (72 hours)") + plt.xlabel("Upstream Release Tag") + plt.ylabel( + "Time difference between upstream release and Helm Chart release (hours)" + ) + plt.title(f"Time to Release Helm Chart for {repo}") plt.legend() plt.grid(True) plt.xticks(rotation=45) plt.tight_layout() - plt.savefig(f'time_difference_plot_{repo}.png') + plt.savefig(f"time_difference_plot_{repo}.png") plt.close() print("The plots have been saved as 'time_difference_plot_.png'") + # Example usage if __name__ == "__main__": - plot_time_difference('merged_releases.csv') + plot_time_difference("merged_releases.csv")