library(usmap)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(RColorBrewer)
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggplot2)
library(maps)
library(plotrix)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ✔ readr     2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ plotly::filter() masks dplyr::filter(), stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ✖ purrr::map()     masks maps::map()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(timetk)
library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(forecast)


USADatafinal <- read.csv("/Users/pranavdharmadhikari/Downloads/UFO_time (1).csv")
head(USADatafinal)
##          Date_time date_documented Year Month Hour Season Country    Region
## 1 10/10/1949 20:30       4/27/2004 1949    10   20 Autumn     USA     Texas
## 2 10/10/1949 21:00      12/16/2005 1949    10   21 Autumn     USA     Texas
## 3 10/10/1955 17:00       1/21/2008 1955    10   17 Autumn     GBR   England
## 4 10/10/1956 21:00       1/17/2004 1956    10   21 Autumn     USA     Texas
## 5 10/10/1960 20:00       1/22/2004 1960    10   20 Autumn     USA    Hawaii
## 6 10/10/1961 19:00       4/27/2007 1961    10   19 Autumn     USA Tennessee
##         Locale latitude   longitude    shape length
## 1   San Marcos 29.88306  -97.941111 Cylinder   2700
## 2 Bexar County 29.38421  -98.581082    Light   7200
## 3      Chester 53.20000   -2.916667   Circle     20
## 4         Edna 28.97833  -96.645833   Circle     20
## 5      Kaneohe 21.41806 -157.803611    Light    900
## 6      Bristol 36.59500  -82.188889   Sphere    300
##                                                                                                                                                  Description
## 1                    This event took place in early fall around 1949-50. It occurred after a Boy Scout meeting in the Baptist Church. The Baptist Church sit
## 2                                                            1949 Lackland AFB&#44 TX.  Lights racing across the sky &amp; making 90 degree turns on a dime.
## 3                                                                                                        Green/Orange circular disc over Chester&#44 England
## 4                 My older brother and twin sister were leaving the only Edna theater at about 9 PM&#44...we had our bikes and I took a different route home
## 5 AS a Marine 1st Lt. flying an FJ4B fighter/attack aircraft on a solo night exercise&#44 I was at 50&#44000&#39 in a &quot;clean&quot; aircraft (no ordinan
## 6                 My father is now 89 my brother 52 the girl with us now 51 myself 49 and the other fellow which worked with my father if he&#39s still livi
USADatafinal$Date_time = as.Date(USADatafinal$Date_time, "%m/%d/%Y %H:%M")
USADatafinal = USADatafinal[order(USADatafinal$Date_time),]
str(USADatafinal$Date_time)
##  Date[1:80328], format: "1906-11-11" "1910-01-01" "1910-06-01" "1916-04-05" "1920-06-11" ...
USAData <- USADatafinal %>%
  filter(Country == 'USA')
head(USAData)
##    Date_time date_documented Year Month Hour Season Country   Region
## 1 1910-01-01       9/15/2005 1910     1    0 Winter     USA Missouri
## 2 1910-06-01       4/16/2005 1910     6   15 Summer     USA    Texas
## 3 1920-06-11       5/12/2009 1920     6   21 Summer     USA  Indiana
## 4 1925-12-28       5/11/2005 1925    12   18 Winter     USA Illinois
## 5 1929-07-05       8/16/2002 1929     7   14 Summer     USA   Oregon
## 6 1930-06-01       1/19/2005 1930     6   22 Summer     USA New York
##        Locale latitude  longitude    shape length
## 1  Kirksville 40.19472  -92.58306     Disk    120
## 2 Wills Point 32.70917  -96.00806    Cigar    120
## 3      Cicero 40.12389  -86.01333  Unknown     60
## 4    Atkinson 41.42083  -90.01500     Disk     60
## 5    Buchanan 43.64250 -118.62750     Disk     60
## 6   Freeville 42.50917  -76.39380 Triangle   1200
##                                                                                                                                     Description
## 1                                                                                           Historical sighting (1903 - 1913) Northern Missouri
## 2                                                                                                  Cigar shaped object moving from West to East
## 3                                                    ((NUFORC Note:  Probable hoax.  Note date.  PD))  Strange gathering of objects in the sky.
## 4                                                                     Young boy witnesses disc in sky above Illinois farmland in December 1925.
## 5 we were traveling east of burns&#44clmbing up thru a cut in the rim rocks when this object very slowely flew over the top of us&#44 some 50 f
## 6                                                                                                            Very Large Triangle Shapped Object
USADatafinal$Date_time <- as.POSIXct(USADatafinal$Date_time, format = "%Y-%m-%d %H:%M:%S")
ggplot(USAData, aes(x = Year, Y = Country)) +
  geom_bar(stat = "count") +
  labs(title = "UFO Sightings by Year in the USA",
       x = "Year",
       y = "Number of Sightings") +
  theme_minimal()+ scale_x_continuous(limits = c(1900, 2025), breaks = seq(1900, 2025, by = 10))

#1 Geographic Distribution
#Interactive section wont knit, so I have added the static version.
Geogdist <- ggplot() +
geom_polygon(data = map_data("state"), aes(x = long, y = lat, group = group), fill = "white", color = "black") +
geom_point(data = USAData, aes(x = longitude, y = latitude, color = as.factor(Year), size = 0.2)) +
coord_fixed(1.3, xlim = c(-125, -66), ylim = c(24, 49)) + 
scale_color_viridis_d(option = "magma") +
scale_size(range = c(0.1, 1)) +
theme_void() +
labs(title = "UFO Sightings in the USA over the years")
print(Geogdist)

usa_mapinteractive <- ggplotly(Geogdist, tooltip = "Year")
print(usa_mapinteractive)
#2 Distribution by Shape
#Interactive section wont knit, so I have added the static version.
states <- map_data("state")
DistbyShape <- ggplot() +
geom_polygon(data = states, aes(x = long, y = lat, group = group), fill = "white", color = "black") +
geom_point(data = USAData, aes(x = longitude, y = latitude, color = shape, size = 0.2)) +
coord_fixed(1.3, xlim = c(-125, -66), ylim = c(24, 49)) + 
scale_color_viridis_d(option = "magma") +
scale_size(range = c(0.1, 1)) +
theme_void() +
labs(title = "UFO Sightings in the USA by shape")
print(DistbyShape)

usa_Shapeinteractive <- ggplotly(DistbyShape, tooltip = "Shape")
print(usa_Shapeinteractive)
# Distribution by hour
#Interactive section wont knit, so I have added the static version.
c25 <- c(
  "dodgerblue2", "#E31A1C",
  "green4",
  "#6A3D9A", 
  "#FF7F00", 
  "black", "gold1",
  "skyblue2", "#FB9A99", 
  "palegreen2",
  "#CAB2D6",
  "#FDBF6F", 
  "gray70", "khaki2",
  "maroon", "orchid1", "deeppink1", "blue1", "steelblue4",
  "darkturquoise", "green1", "yellow4", "yellow3",
  "darkorange4", "brown"
)
USADistbyHour <- ggplot() +
geom_polygon(data = states, aes(x = long, y = lat, group = group), fill = "white", color = "black") +
geom_point(data = USAData, aes(x = longitude, y = latitude, color = factor(Hour), size = 0.2)) +
coord_fixed(1.3, xlim = c(-125, -66), ylim = c(24, 49)) + 
scale_color_manual(values = c25) +
scale_size(range = c(0.1, 1)) +
theme_void() +
labs(title = "UFO Sightings in the USA")
print(USADistbyHour)

USADistbyHour_interactive <- ggplotly(USADistbyHour, tooltip = "Hour")
print(USADistbyHour_interactive)
# Distribution by duration.

ggplot(USAData, aes(x = Year, y = length)) +
  geom_bar(stat = "identity", fill = "red") +
  labs(title = "UFO sightings distribution by duration",
       x = "Time", y = "lenght") +
  theme_minimal()

#3 Time Series analysis

USAData$dateonly <- as.Date(USAData$Date_time)
timeseries <- USAData %>%
  group_by(dateonly) %>%
  summarise(Sightings = n())
timeseriesdata <- ts(timeseries$Sightings, frequency = 365)
decomposed <- stl(timeseriesdata, s.window = "periodic")
plot(decomposed, main = "Decomposition of UAP Sightings Time Series")

ggplot(timeseries, aes(x = dateonly, y = Sightings)) +
  geom_line(color = "black", size = 1) +  
  geom_smooth(method = "loess", color = "white", fill = "darkgrey", alpha = 0.3) +
  labs(title = "UAP Sightings Over Time",
       x = "Date",
       y = "Number of Sightings",
       caption = "Smoothed curve represents the trend") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        plot.title = element_text(size = 20, face = "bold", hjust = 0.5),  
        plot.caption = element_text(size = 10, hjust = 0.5),
        legend.position = "none")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'

#4 Arima Model fitting

USAData$Date_time <- as.POSIXct(USAData$Date_time, format = "%m/%d/%Y %H:%M", tz = "UTC")
USAData$dateonly <- as.Date(USAData$Date_time)
timeseries <- USAData %>%
  count(dateonly) %>%
  rename(Sightings = n)
timeseriesdata <- ts(timeseries$Sightings, frequency = 365)
arima_model <- auto.arima(timeseriesdata)
summary(arima_model)
## Series: timeseriesdata 
## ARIMA(1,1,1) 
## 
## Coefficients:
##          ar1      ma1
##       0.1652  -0.9597
## s.e.  0.0105   0.0032
## 
## sigma^2 = 39.42:  log likelihood = -33006.63
## AIC=66019.25   AICc=66019.25   BIC=66040.92
## 
## Training set error measures:
##                      ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.02949618 6.277408 3.116939 -48.11558 70.51726 0.7356512
##                      ACF1
## Training set 0.0006667372
forecast_values <- forecast(arima_model, h = 365)
plot(forecast_values, main = "Forecast of UAP Sightings using ARIMA",
     xlab = "Year", ylab = "Number of Sightings")