Hello! Glad to see you here. This is a collection of my personal projects made during free time.
Some possible questions that could be answered using the criminal data set:
#1.- TopCrimeDarPerYear
WITH ranked_days AS (
SELECT c.year,
c.month,
c.day,
COUNT(c.day) AS count_per_day,
ROW_NUMBER() OVER
(PARTITION BY c.year ORDER BY COUNT(c.day) DESC ) AS row_num
FROM crime AS c
GROUP BY c.year,c.month,c.day
)
SELECT year,month,day,count_per_day
FROM ranked_days
WHERE row_num IN (1)
LIMIT 20;

WITH ranked_days AS (
SELECT c.year,
c.month,
c.day,
COUNT(c.day) AS count_per_day,
ROW_NUMBER() OVER
(PARTITION BY c.year ORDER BY COUNT(c.day) ASC ) AS row_num
FROM crime AS c
GROUP BY c.year,c.month,c.day
)
SELECT year,month,day,count_per_day
FROM ranked_days
WHERE row_num IN (1)
SELECT year, COUNT(crime_type) as crimes_per_year
FROM crime
GROUP BY year
ORDER BY crimes_per_year DESC
LIMIT 20;
Despite 20111 being the year with the highest crimes in a day ,we can see the downward trend in amount of crimes every year.
WITH RankedNeighborhoods AS (
SELECT
c.year,
l.neighbourhood,
COUNT(l.neighbourhood) as count_crimes,
ROW_NUMBER() OVER (PARTITION BY c.year ORDER BY COUNT(l.neighbourhood) DESC) as row_num
FROM crime AS c
LEFT JOIN location AS l ON c.location_location_id = l.location_id
GROUP BY c.year, l.neighbourhood
)
SELECT year, neighbourhood, count_crimes
FROM RankedNeighborhoods
WHERE row_num = 1; # For the least crimes we just modify this for 22 (last row)
SELECT
subquery.crime_type,
CONCAT(ROUND(SUM(CASE WHEN subquery.neighbourhood = 'Arbutus Ridge' THEN (count_per_neighborhood / total_count) * 100 END), 2), '%') AS Arbutus_Ridge,
CONCAT(ROUND(SUM(CASE WHEN subquery.neighbourhood = 'Downtown' THEN (count_per_neighborhood / total_count) * 100 END), 2), '%') AS Downtown,
CONCAT(ROUND(SUM(CASE WHEN subquery.neighbourhood = 'Dunbar-Southlands' THEN (count_per_neighborhood / total_count) * 100 END), 2), '%') AS Dunbar_Southlands,
CONCAT(ROUND(SUM(CASE WHEN subquery.neighbourhood = 'Fairview' THEN (count_per_neighborhood / total_count) * 100 END), 2), '%') AS Fairview,
CONCAT(ROUND(SUM(CASE WHEN subquery.neighbourhood = 'Grandview-Woodland' THEN (count_per_neighborhood / total_count) * 100 END), 2), '%') AS Grandview_Woodland,
CONCAT(ROUND(SUM(CASE WHEN subquery.neighbourhood = 'Hastings-Sunrise' THEN (count_per_neighborhood / total_count) * 100 END), 2), '%') AS Hastings_Sunrise,
CONCAT(ROUND(SUM(CASE WHEN subquery.neighbourhood = 'Kensington-Cedar Cottage' THEN (count_per_neighborhood / total_count) * 100 END), 2), '%') AS Kensington_Cedar_Cottage,
CONCAT(ROUND(SUM(CASE WHEN subquery.neighbourhood = 'Kerrisdale' THEN (count_per_neighborhood / total_count) * 100 END), 2), '%') AS Kerrisdale,
CONCAT(ROUND(SUM(CASE WHEN subquery.neighbourhood = 'Killarney' THEN (count_per_neighborhood / total_count) * 100 END), 2), '%') AS Killarney,
CONCAT(ROUND(SUM(CASE WHEN subquery.neighbourhood = 'Kitsilano' THEN (count_per_neighborhood / total_count) * 100 END), 2), '%') AS Kitsilano,
CONCAT(ROUND(SUM(CASE WHEN subquery.neighbourhood = 'Marpole' THEN (count_per_neighborhood / total_count) * 100 END), 2), '%') AS Marpole,
CONCAT(ROUND(SUM(CASE WHEN subquery.neighbourhood = 'Mount Pleasant' THEN (count_per_neighborhood / total_count) * 100 END), 2), '%') AS Mount_Pleasant,
CONCAT(ROUND(SUM(CASE WHEN subquery.neighbourhood = 'Musqueam' THEN (count_per_neighborhood / total_count) * 100 END), 2), '%') AS Musqueam,
CONCAT(ROUND(SUM(CASE WHEN subquery.neighbourhood = 'Oakridge' THEN (count_per_neighborhood / total_count) * 100 END), 2), '%') AS Oakridge,
CONCAT(ROUND(SUM(CASE WHEN subquery.neighbourhood = 'Renfrew-Collingwood' THEN (count_per_neighborhood / total_count) * 100 END), 2), '%') AS Renfrew_Collingwood,
CONCAT(ROUND(SUM(CASE WHEN subquery.neighbourhood = 'Riley Park' THEN (count_per_neighborhood / total_count) * 100 END), 2), '%') AS Riley_Park,
CONCAT(ROUND(SUM(CASE WHEN subquery.neighbourhood = 'Shaughnessy' THEN (count_per_neighborhood / total_count) * 100 END), 2), '%') AS Shaughnessy,
CONCAT(ROUND(SUM(CASE WHEN subquery.neighbourhood = 'South Cambie' THEN (count_per_neighborhood / total_count) * 100 END), 2), '%') AS South_Cambie,
CONCAT(ROUND(SUM(CASE WHEN subquery.neighbourhood = 'Stanley Park' THEN (count_per_neighborhood / total_count) * 100 END), 2), '%') AS Stanley_Park,
CONCAT(ROUND(SUM(CASE WHEN subquery.neighbourhood = 'Strathcona' THEN (count_per_neighborhood / total_count) * 100 END), 2), '%') AS Strathcona,
CONCAT(ROUND(SUM(CASE WHEN subquery.neighbourhood = 'Sunset' THEN (count_per_neighborhood / total_count) * 100 END), 2), '%') AS Sunset,
CONCAT(ROUND(SUM(CASE WHEN subquery.neighbourhood = 'Victoria-Fraserview' THEN (count_per_neighborhood / total_count) * 100 END), 2), '%') AS Victoria_Fraserview,
CONCAT(ROUND(SUM(CASE WHEN subquery.neighbourhood = 'West End' THEN (count_per_neighborhood / total_count) * 100 END), 2), '%') AS West_End,
CONCAT(ROUND(SUM(CASE WHEN subquery.neighbourhood = 'West Point Grey' THEN (count_per_neighborhood / total_count) * 100 END), 2), '%') AS West_Point_Grey
FROM (
SELECT
C.crime_type,
L.neighbourhood,
COUNT(*) AS count_per_neighborhood,
SUM(COUNT(*)) OVER (PARTITION BY L.neighbourhood) AS total_count
FROM
Crime C
JOIN
Location L ON C.location_location_id = L.location_id
GROUP BY
C.crime_type, L.neighbourhood
) AS subquery
GROUP BY
subquery.crime_type;



SELECT l.neighbourhood, l.hundred_block, COUNT(l.hundred_block) AS count_street
FROM (
SELECT * FROM location
WHERE neighbourhood LIKE "%Downtown%"
) AS c
LEFT JOIN location AS l ON c.location_id = l.location_id
GROUP BY l.neighbourhood, l.hundred_block
ORDER BY count_street DESC
LIMIT 10;
OR
DELIMITER //
CREATE PROCEDURE GetCrimeDataByNeighbourhood(IN neighbourhood_param VARCHAR(255))
BEGIN
SET @neighbourhood_filter = CONCAT('%', neighbourhood_param, '%');
SET @query = '
WITH crimes_neighbourhood AS (
SELECT *
FROM crime as c
JOIN location as l ON c.location_location_id = l.location_id
WHERE l.neighbourhood LIKE ?
)
SELECT hour, hundred_block, COUNT(*) AS conteo_street
FROM crimes_neighbourhood
GROUP BY hour, hundred_block
ORDER BY conteo_calle DESC
LIMIT 50;
';
PREPARE stmt FROM @query;
EXECUTE stmt USING @neighbourhood_filter;
DEALLOCATE PREPARE stmt;
END //
DELIMITER ;



WITH top_crimes_per_neighbourhood AS (
SELECT
l.neighbourhood,
c.crime_type,
COUNT(c.crime_type) as count_crimes,
ROW_NUMBER() OVER
(PARTITION BY l.neighbourhood ORDER BY COUNT(c.crime_type) DESC ) AS ROW_NUM
FROM crime AS c
LEFT JOIN location AS l ON c.location_location_id=l.location_id
GROUP BY l.neighbourhood,c.crime_type
)
SELECT neighbourhood,crime_type,count_crimes
FROM top_crimes_per_neighbourhood
WHERE ROW_NUM IN (1,2,3);


WITH rank_hour_downtown AS(
SELECT c.year,
c.hour,
l.neighbourhood,
COUNT(c.crime_type) AS count_crimes,
ROW_NUMBER() OVER (PARTITION BY c.year,l.neighbourhood ORDER BY COUNT(c.crime_type) DESC) AS row_num
FROM crime AS c
LEFT JOIN location AS l ON c.location_location_id=l.location_id
WHERE l.neighbourhood LIKE "%Downtown%"
GROUP BY c.year,c.hour,l.neighbourhood
)
SELECT * FROM rank_hour_downtown
WHERE row_num IN (1,2,3);



WITH ranked_months AS (
SELECT c.year,
c.month,
COUNT(c.crime_type) AS count_crime_type,
ROW_NUMBER() OVER
(PARTITION BY c.year ORDER BY COUNT(c.crime_type) ASC ) AS row_num
FROM crime AS c
GROUP BY c.year,c.month
)
SELECT year,month,count_crime_type
FROM ranked_months
WHERE row_num IN (1,2,3)
WITH RankMonth AS (
SELECT
c.crime_type,
l.neighbourhood,
l.hundred_block,
COUNT(c.crime_type) AS count_crimes,
ROW_NUMBER() OVER (PARTITION BY l.neighbourhood ORDER BY COUNT(c.crime_type) ASC) AS row_num
FROM crime AS c
LEFT JOIN location AS l ON c.location_location_id = l.location_id
WHERE l.hundred_block NOT LIKE "%OFFSET TO PROTECT PRIVACY%"
GROUP BY c.crime_type, l.neighbourhood, l.hundred_block
)
SELECT
neighbourhood,
hundred_block,
count_crimes
FROM RankMonth
WHERE row_num IN (1, 2)

