future <- read.csv("/Users/admin/Downloads/future.csv")
head(future)
historical <- read.csv("/Users/admin/Downloads/historical.csv")
head(historical)
#Q1. We explore the pairwise comparisons between GDP and climatic variables, focusing on temperature and precipitation. The GDP and temperature have a strong negative correlation. GDP and precipitation have a weakly negative correlation.
library(ggplot2)
pairs <- pairs(historical[, c("gdp", "temp", "prec")])
ggplot(historical, aes(x = temp, y = gdp)) +
geom_point() +
geom_smooth(method = "lm") +
labs(title = "GDP vs Temperature", x = "Temperature (°C)", y = "GDP (2023 USD)")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(historical, aes(x = prec, y = gdp)) +
geom_point() +
geom_smooth(method = "lm") +
labs(title = "GDP vs Precipitation", x = "Precipitation (mm)", y = "GDP (2023 USD)")
## `geom_smooth()` using formula = 'y ~ x'
cor_matrix <- cor(historical[, c("gdp", "temp", "prec")])
print(cor_matrix)
## gdp temp prec
## gdp 1.0000000 -0.3868671 -0.1461041
## temp -0.3868671 1.0000000 0.2898795
## prec -0.1461041 0.2898795 1.0000000
#Q2. The plot illustrates a pronounced upward trajectory, indicating a steady increase in GDP over the years. The variations in temperature over the years exhibit substantial fluctuations; particularly notable is a consistent dip from 1990 to approximately 1997.The plot depicting the relationship between the years and precipitation initially reveals a moderately descending slope.
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ✔ readr 2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(broom)
ggplot(historical, aes(x = year)) +
geom_smooth(aes(y = gdp, color="brown")) +
labs(title = "Historical Changes in GDP",
x = "Year", y = "GDP")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
ggplot(historical, aes(x = year)) +
geom_smooth(aes(y = temp, color="brown")) +
labs(title = "Historical Changes in Temp",
x = "Year", y = "Temp")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
ggplot(historical, aes(x = year)) +
geom_smooth(aes(y = prec, color="brown")) +
labs(title = "Historical Changes in Precipitation",
x = "Year", y = "Precipitation")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
mean_temp_per_year <- historical %>%
group_by(year) %>%
summarize(mean_temp = mean(temp))
# Plot historical changes in mean temperature
ggplot(mean_temp_per_year, aes(x = year, y = mean_temp)) +
geom_line(color = "brown") +
labs(title = "Historical Changes in Temperature",
x = "Year", y = "Temperature")
mean_prec_per_year <- historical %>%
group_by(year) %>%
summarize(mean_prec = mean(prec))
ggplot(mean_prec_per_year, aes(x = year, y = mean_prec)) +
geom_line(color = "brown") +
labs(title = "Historical Changes in Precipitation",
x = "Year", y = "Precipitation")
mean_gdp_per_year <- historical %>%
group_by(year) %>%
summarize(mean_gdp = mean(gdp))
ggplot(mean_gdp_per_year, aes(x = year, y = mean_gdp)) +
geom_line(color = "brown") +
labs(title = "Historical Changes in GDP",
x = "Year", y = "GDP")
ggplot(historical, aes(x = year)) +
geom_line(aes(y = gdp, color = "GDP")) +
geom_line(aes(y = temp, color = "Temperature")) +
geom_line(aes(y = prec, color = "Precipitation")) +
labs(title = "Historical Changes in GDP, Temperature, and Precipitation",
x = "Year", y = "Values") +
scale_color_manual(values = c("GDP" = "blue", "Temperature" = "red", "Precipitation" = "green"))
#Q4. The model confirms temperature as the most impactful variable with a significant adverse effect on GDP. However, the low R-squared suggests other unmodeled factors substantially influence economic growth.
model <- lm(gdp ~ temp + prec, data = historical)
summary(model)
##
## Call:
## lm(formula = gdp ~ temp + prec, data = historical)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23927 -4790 -2084 262 109028
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17201.6722 370.1105 46.477 < 2e-16 ***
## temp -523.7223 16.9945 -30.817 < 2e-16 ***
## prec -0.5379 0.1771 -3.038 0.00239 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10890 on 6223 degrees of freedom
## Multiple R-squared: 0.1509, Adjusted R-squared: 0.1507
## F-statistic: 553.1 on 2 and 6223 DF, p-value: < 2.2e-16
# Tidy model output
tidy(model)
future$predicted_gdp <- predict(model, newdata = future)
#Q5.The forecast for future years, based on upcoming data, delineates the trajectory of GDP changes across different countries under the scenarios of SSP1 and SSP5, representing minimum and maximum impact, respectively. A prevailing trend in numerous nations indicates a diminishing GDP value over the next 80 years.
library(ggplot2)
ggplot(future, aes(x = year, y = predicted_gdp, color = scenario)) +
geom_line() +
facet_wrap(~country) +
labs(title = "Predicted GDP under Future Scenarios", x = "Year", y = "Predicted GDP")