argocd-helm/scripts/release-analysis/merge_csvs.py
Tim Collins 56c5a31c9e
feat(github): Analyse past release times (#3135)
* feat(github): Analyse past release times

Signed-off-by: Tim Collins <tim@thecollins.team>

* typo

Signed-off-by: Tim Collins <tim@thecollins.team>

* add megalinter config. Fix my rubbish code to appease the linter

Signed-off-by: Tim Collins <tim@thecollins.team>

---------

Signed-off-by: Tim Collins <tim@thecollins.team>
Co-authored-by: Aikawa <yu.croco@gmail.com>
2025-01-29 13:43:03 +00:00

108 lines
4 KiB
Python

import csv
from datetime import datetime
def merge_csv_files(csv_file_1, csv_file_2, output_csv_file):
# Read the first CSV file into a dictionary
releases_1 = {}
with open(csv_file_1, mode="r") as file:
reader = csv.DictReader(file)
for row in reader:
key = (row["Repository"], row["Release Tag"])
releases_1[key] = row
# Read the second CSV file and find the oldest release for each appVersion
oldest_releases = {}
valid_repos = {"argo-cd", "argo-events", "argo-workflows", "argo-rollouts"}
with open(csv_file_2, mode="r") as file:
reader = csv.DictReader(file)
for row in reader:
release_name = row["Release Name"]
repo_name = "-".join(release_name.split("-")[:-1])
if repo_name in valid_repos:
app_version = row["App Version"]
release_datetime = datetime.strptime(
f"{row['Release Date']} {row['Release Time']}", "%Y-%m-%d %H:%M:%S"
)
if (
repo_name,
app_version,
) not in oldest_releases or release_datetime < oldest_releases[
(repo_name, app_version)
][
"datetime"
]:
oldest_releases[(repo_name, app_version)] = {
"row": row,
"datetime": release_datetime,
}
# Merge the oldest releases with the first CSV file
merged_releases = []
for (repo_name, app_version), data in oldest_releases.items():
row = data["row"]
for key, release in releases_1.items():
if (
repo_name == release["Repository"]
and app_version == release["Release Tag"]
):
time_difference = data["datetime"] - datetime.strptime(
f"{release['Release Date']} {release['Release Time']}",
"%Y-%m-%d %H:%M:%S",
)
time_difference_hours = (
time_difference.total_seconds() / 3600
) # Convert to hours
merged_row = {
"Repository": release["Repository"],
"Release Tag": release["Release Tag"],
"Release Date": release["Release Date"],
"Release Time": release["Release Time"],
"App Version": app_version,
"Release Name": row["Release Name"],
"Release Date 2": row["Release Date"],
"Release Time 2": row["Release Time"],
"Time Difference": time_difference_hours,
}
merged_releases.append(merged_row)
break
else:
merged_row = {
"Repository": repo_name,
"Release Tag": "",
"Release Date": "",
"Release Time": "",
"App Version": app_version,
"Release Name": row["Release Name"],
"Release Date 2": row["Release Date"],
"Release Time 2": row["Release Time"],
"Time Difference": "",
}
merged_releases.append(merged_row)
# Write the merged data to a new CSV file
with open(output_csv_file, mode="w", newline="") as file:
fieldnames = [
"Repository",
"Release Tag",
"Release Date",
"Release Time",
"App Version",
"Release Name",
"Release Date 2",
"Release Time 2",
"Time Difference",
]
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader()
for row in merged_releases:
writer.writerow(row)
print(f"Merged data has been written to {output_csv_file}")
# Example usage
if __name__ == "__main__":
merge_csv_files(
"argo_releases.csv", "argo_helm_releases.csv", "merged_releases.csv"
)