import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go

# Read the dataset
df = pd.read_csv("crimedata_csv_all_years.csv")

# Create a Date column
df['DATE'] = pd.to_datetime(df[['YEAR','MONTH','DAY']])

# Create a new feature "Weekday"
df['WEEKDAY'] = df['DATE'].dt.dayofweek
print(df.head())

Get all the unique neighbourhoods in Vancouver City

print(df['NEIGHBOURHOOD'].unique())
['Oakridge' 'Fairview' 'West End' 'Central Business District'
 'Hastings-Sunrise' 'Strathcona' 'Grandview-Woodland' 'Kitsilano'
 'Kensington-Cedar Cottage' 'Sunset' 'Mount Pleasant' 'Stanley Park'
 'Shaughnessy' 'Marpole' 'West Point Grey' 'Victoria-Fraserview'
 'Kerrisdale' 'Riley Park' 'Arbutus Ridge' 'Renfrew-Collingwood'
 'Killarney' 'Dunbar-Southlands' 'South Cambie' 'Musqueam' nan]
count_df = df['HOUR'].value_counts()
city_count = count_df[:,]
plt.figure(figsize=(12,5))
sns.barplot(count_df.index, count_df.values, alpha=1)

plt.title('Crime occurence at different time of the day')
plt.ylabel('Number of Occurrences', fontsize=12)
plt.xlabel('Hour', fontsize=12)
plt.show()
for column in df:
    print(df[column].isna().value_counts())
False    619574
Name: TYPE, dtype: int64
False    619574
Name: YEAR, dtype: int64
False    619574
Name: MONTH, dtype: int64
False    619574
Name: DAY, dtype: int64
False    619574
Name: HOUR, dtype: int64
False    619574
Name: MINUTE, dtype: int64
False    619561
True         13
Name: HUNDRED_BLOCK, dtype: int64
False    555000
True      64574
Name: NEIGHBOURHOOD, dtype: int64
False    619455
True        119
Name: X, dtype: int64
False    619455
True        119
Name: Y, dtype: int64
False    619574
Name: DATE, dtype: int64
False    619574
Name: WEEKDAY, dtype: int64

Frequency of each crime type

new_df = df.groupby('YEAR')
print(df['TYPE'].value_counts())
Theft from Vehicle                                        207712
Mischief                                                   83355
Break and Enter Residential/Other                          66065
Other Theft                                                64087
Offence Against a Person                                   61706
Theft of Vehicle                                           41394
Break and Enter Commercial                                 38645
Theft of Bicycle                                           30887
Vehicle Collision or Pedestrian Struck (with Injury)       25183
Vehicle Collision or Pedestrian Struck (with Fatality)       289
Homicide                                                     251
Name: TYPE, dtype: int64
nameplot = df['NEIGHBOURHOOD'].value_counts().plot.bar(title='Count of each type of crime happened in Vancouver', figsize=(12,6))
nameplot.set_xlabel('category',size=20)
nameplot.set_ylabel('crime count',size=20)
Text(0, 0.5, 'crime count')

Finding crime reported in each weekday

count_df = df['WEEKDAY'].value_counts()
city_count = count_df[:,]
sns.barplot(count_df.index, count_df.values, alpha=1)
plt.title('Crime occurence on different days of the week')
plt.ylabel('Number of Occurrences', fontsize=12)
plt.xlabel('Day of the week', fontsize=12)
plt.show()
# year labels
year_labels = sorted(df["YEAR"].unique())

# crime types
crime_types = sorted(df["TYPE"].unique().tolist())

crime_count_by_year = pd.DataFrame(columns =["year"]) 
crime_count_by_year["year"] = year_labels
crime_count_by_year

for current_type in crime_types:
    current_crime = df[df["TYPE"]==current_type]
    current_crime_counts = current_crime["YEAR"].value_counts(sort=False)
    current_crime_index = current_crime_counts.index.tolist()
    current_crime_index, current_crime_counts = zip(*sorted(zip(current_crime_index, current_crime_counts)))
    crime_count_by_year[current_type] = current_crime_counts
crime_count_by_year

crime_types = sorted(df["TYPE"].unique().tolist())

fig = go.Figure()
for current_crime in crime_types:
    current_type_count = crime_count_by_year[current_crime]
    fig.add_trace(
        go.Scatter(
            x=year_labels, 
            y=current_type_count,
            mode='lines+markers',
            name=current_crime
        )
    )

fig.update_layout(title='Crimes Over the Years in Vancouver by Type',
                    xaxis_title='Year',
                    yaxis_title='Absolute Change',
                    autosize=True,
                    height=570
                 )

fig.update_layout(legend_orientation="h")

fig.show()
new_df = df.groupby('YEAR')
df2 = df['TYPE'].value_counts()
crime_types = df2.index
new_df = df.groupby('TYPE')

for value in crime_types:
    df1 = new_df.get_group(value)
    count_df = df1['YEAR'].value_counts()
    plt.figure(figsize=(8.5,5))
    sns.barplot(count_df.index, count_df.values, alpha=1)
    plt.title(value)
    plt.ylabel('Number of Occurrences', fontsize=12)
    plt.xlabel('Year', fontsize=12)
    plt.show()
    
Index(['Theft from Vehicle', 'Mischief', 'Break and Enter Residential/Other',
       'Other Theft', 'Offence Against a Person', 'Theft of Vehicle',
       'Break and Enter Commercial', 'Theft of Bicycle',
       'Vehicle Collision or Pedestrian Struck (with Injury)',
       'Vehicle Collision or Pedestrian Struck (with Fatality)', 'Homicide'],
      dtype='object')