# This was not part of the exercise, but to ensure you can run the script we create some data here to ensure it will always work.
data = {
"Country": ["Netherlands", "Belgium", "France", "Germany", "Spain", "USA", "China", "Sweden"],
"Continent": ["Europe", "Europe", "Europe", "Europe", "Europe", "North America", "Asia", "Europe"],
"renewable_energy": [50, 30, 40, np.nan, 25, 80, 100, 60], # some values, one missing (Germany)
"total_energy": [120, 100, 150, 200, np.nan, 300, 500, 90] # some values, one missing (Spain)
}
# transform into a pandas dataframe for the functions to work
df = pd.DataFrame(data)
# ---------------------------------------------------------
# 2. Inspect the data
# ---------------------------------------------------------
print("First 10 rows of the dataset:")
print(df.head(10))
print("Column types:")
print(df.dtypes)
print("Summary statistics (describe):")
print(df.describe())
# ---------------------------------------------------------
# 3. Subset the data to European countries
# ---------------------------------------------------------
EU_data = df[df["Continent"] == "Europe"].copy()
print("Data for European countries (EU_data):")
print(EU_data)
# ---------------------------------------------------------
# 4. Compute renewable energy percentage
# renewable_percentage = (renewable_energy / total_energy) * 100
# ---------------------------------------------------------
EU_data["renewable_percentage"] = (EU_data["renewable_energy"] / EU_data["total_energy"]) * 100
print("EU_data with renewable_percentage:")
print(EU_data)
# ---------------------------------------------------------
# 5. Classify countries by renewable_percentage
# > 50% -> "High Renewable"
# 20–50% -> "Medium Renewable"
# else -> "Low Renewable"
# ---------------------------------------------------------
def classify_renewable(pct):
if pct > 50:
return "High Renewable"
elif pct >= 20 and pct <= 50:
return "Medium Renewable"
else:
return "Low Renewable"
EU_data["renewable_category"] = EU_data["renewable_percentage"].apply(classify_renewable)
print("EU_data with renewable_category:")
print(EU_data)
# ---------------------------------------------------------
# 6. Using if/else to check for missing values
# in renewable_energy or total_energy
# ---------------------------------------------------------
missing_mask = EU_data["renewable_energy"].isna() | EU_data["total_energy"].isna()
n_missing = missing_mask.sum()
if n_missing > 0:
print("There are", n_missing, "rows with missing values in renewable_energy or total_energy.")
else:
print("There are no missing values in renewable_energy or total_energy.")
# Optional: show the incomplete rows
if n_missing > 0:
print("Rows with missing values:")
print(EU_data[missing_mask])
# ---------------------------------------------------------
# 7. Group and summarize data
# Group by renewable_category and compute the average
# renewable_percentage for each category
# ---------------------------------------------------------
summary = (
EU_data
.groupby("renewable_category")["renewable_percentage"]
.mean()
.reset_index()
)
print("Average renewable_percentage by renewable_category:")
print(summary)