Hello! Glad to see you here. This is a collection of my personal projects made during free time.
import geopandas as gpd
import numpy as np
gdf=gpd.read_file('C:/PATH.geojson')
gdf.shape
(100271, 10)
gdf.columns
Index(['hash', 'number', 'street', 'unit', 'city', 'district', 'region',
'postcode', 'id', 'geometry'],
dtype='object')
gdf.tail()
| hash | number | street | unit | city | district | region | postcode | id | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|
| 100266 | 221432fdd9c5da83 | POINT (-123.14031 49.27329) | ||||||||
| 100267 | 2d3addd97703be23 | 304 | E 28TH AV | POINT (-123.09914 49.24554) | ||||||
| 100268 | ffd277f41f8d9fc2 | 4405 | SOPHIA ST | POINT (-123.09893 49.24553) | ||||||
| 100269 | 6e71eff7bfc20b92 | 502 | E 7TH AV | POINT (-123.09276 49.26409) | ||||||
| 100270 | cc252032cc002b5c | 4833 | SLOCAN ST | POINT (-123.05335 49.24118) |
df = gdf[['number', 'street','geometry']]
df['address'] = df['number'].str.cat(df['street'], sep=' ')
C:\Users\OWNER\PycharmProjects\geopandas\venv\lib\site-packages\geopandas\geodataframe.py:1351: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy super().__setitem__(key, value)
df
| number | street | geometry | address | |
|---|---|---|---|---|
| 0 | 4331 | BLENHEIM ST | POINT (-123.17900 49.24767) | 4331 BLENHEIM ST |
| 1 | 4319 | BLENHEIM ST | POINT (-123.17900 49.24776) | 4319 BLENHEIM ST |
| 2 | 4307 | BLENHEIM ST | POINT (-123.17900 49.24787) | 4307 BLENHEIM ST |
| 3 | 3320 | W 27TH AV | POINT (-123.17942 49.24775) | 3320 W 27TH AV |
| 4 | 3332 | W 27TH AV | POINT (-123.17959 49.24776) | 3332 W 27TH AV |
| ... | ... | ... | ... | ... |
| 100266 | POINT (-123.14031 49.27329) | |||
| 100267 | 304 | E 28TH AV | POINT (-123.09914 49.24554) | 304 E 28TH AV |
| 100268 | 4405 | SOPHIA ST | POINT (-123.09893 49.24553) | 4405 SOPHIA ST |
| 100269 | 502 | E 7TH AV | POINT (-123.09276 49.26409) | 502 E 7TH AV |
| 100270 | 4833 | SLOCAN ST | POINT (-123.05335 49.24118) | 4833 SLOCAN ST |
100271 rows × 4 columns
df
| geometry | address | |
|---|---|---|
| 0 | POINT (-123.17900 49.24767) | 4331 BLENHEIM ST |
| 1 | POINT (-123.17900 49.24776) | 4319 BLENHEIM ST |
| 2 | POINT (-123.17900 49.24787) | 4307 BLENHEIM ST |
| 3 | POINT (-123.17942 49.24775) | 3320 W 27TH AV |
| 4 | POINT (-123.17959 49.24776) | 3332 W 27TH AV |
| ... | ... | ... |
| 100266 | POINT (-123.14031 49.27329) | |
| 100267 | POINT (-123.09914 49.24554) | 304 E 28TH AV |
| 100268 | POINT (-123.09893 49.24553) | 4405 SOPHIA ST |
| 100269 | POINT (-123.09276 49.26409) | 502 E 7TH AV |
| 100270 | POINT (-123.05335 49.24118) | 4833 SLOCAN ST |
100271 rows × 2 columns
df=df[['address','geometry']]
df.head(1)
| address | geometry | |
|---|---|---|
| 0 | 4331 BLENHEIM ST | POINT (-123.17900 49.24767) |
from faker import Faker
import numpy as np
fake = Faker()
df2=df.sample(10000)
df2['']
(10000, 2)
fake.email()
'angelawu@example.net'
class CustomProvider:
def __init__(self, faker):
self.faker = faker
def gmail_email(self):
username = self.faker.user_name()
return f"{username}@gmail.com"
def first_name_last_name_email(self):
first_name = self.faker.first_name()
last_name = self.faker.last_name()
email = self.gmail_email()
return f"{first_name}{last_name}{np.random.randint(0,99)}@gmail.com"
fake = Faker()
fake.add_provider(CustomProvider)
fake_email = fake.gmail_email()
fake_first_name_last_name_email = fake.first_name_last_name_email()
## I am just keeping it as reference , but I ended up following another path
fake_first_name_last_name_email.lower()
'nicolevaldez98@gmail.com'
fake.gmail_email()
'robertjackson@gmail.com'
df2['email']=df2.apply(lambda x: fake.gmail_email(), axis=1)
df2
| address | geometry | ||
|---|---|---|---|
| 70846 | 6739 DAWSON ST | POINT (-123.04939 49.22227) | naguilar@gmail.com |
| 8173 | 3541 W 14TH AV | POINT (-123.18292 49.26004) | yyoung@gmail.com |
| 21773 | 2086 E 35TH AV | POINT (-123.06379 49.23791) | melissa03@gmail.com |
| 34921 | 504 E 29TH AV | POINT (-123.09397 49.24430) | moraleskarina@gmail.com |
| 52167 | 5015 ROSS ST | POINT (-123.08269 49.23913) | smithchristian@gmail.com |
| ... | ... | ... | ... |
| 50952 | 4194 INVERNESS ST | POINT (-123.07968 49.24765) | walter19@gmail.com |
| 75230 | 567 W 17TH AV | POINT (-123.11678 49.25629) | richardfaulkner@gmail.com |
| 14600 | 1988 W 42ND AV | POINT (-123.15144 49.23328) | kadkins@gmail.com |
| 29592 | 708 DENMAN ST | POINT (-123.13439 49.29186) | coliver@gmail.com |
| 13527 | 6422 LABURNUM ST | POINT (-123.15050 49.22800) | susanking@gmail.com |
10000 rows × 3 columns
df2=df2.drop(['email'],axis=1)
df3[['first_name', 'last_name', 'email']] = df2.apply(lambda x: pd.Series([fake.first_name(), fake.last_name(), f"{fake.first_name()}{fake.last_name()}@gmail.com"]), axis=1)
df3 # It was still not working at this point
| address | geometry | first_name | last_name | ||
|---|---|---|---|---|---|
| 70846 | 6739 DAWSON ST | POINT (-123.04939 49.22227) | KyleMeyer@gmail.com | Nicole | Ayers |
| 8173 | 3541 W 14TH AV | POINT (-123.18292 49.26004) | JohnBest@gmail.com | Tammy | Johnson |
| 21773 | 2086 E 35TH AV | POINT (-123.06379 49.23791) | KristinaWilliams@gmail.com | Jamie | Nelson |
| 34921 | 504 E 29TH AV | POINT (-123.09397 49.24430) | NicoleCallahan@gmail.com | Charlene | Garcia |
| 52167 | 5015 ROSS ST | POINT (-123.08269 49.23913) | TamaraBrown@gmail.com | Karen | Elliott |
| ... | ... | ... | ... | ... | ... |
| 50952 | 4194 INVERNESS ST | POINT (-123.07968 49.24765) | LarryBenson@gmail.com | Samantha | Hess |
| 75230 | 567 W 17TH AV | POINT (-123.11678 49.25629) | JenniferMorris@gmail.com | Natalie | Morris |
| 14600 | 1988 W 42ND AV | POINT (-123.15144 49.23328) | PeggyCox@gmail.com | Debbie | Howell |
| 29592 | 708 DENMAN ST | POINT (-123.13439 49.29186) | LisaWilliams@gmail.com | Scott | Patel |
| 13527 | 6422 LABURNUM ST | POINT (-123.15050 49.22800) | MicheleSnyder@gmail.com | Charles | Carpenter |
10000 rows × 5 columns
df2['first_name']=df2.apply(lambda x: fake.first_name(), axis=1)
df2['last_name']= df2.apply(lambda x: fake.last_name(), axis=1)
df2['email']=df2['first_name']+df2['last_name']+str(np.random.randint(0,99))+'@gmail.com'
df2['email']=df2.apply(lambda row: row['first_name']+row['last_name']+str(np.random.randint(0,99))+'@gmail.com', axis=1)
df2
| address | geometry | first_name | last_name | ||
|---|---|---|---|---|---|
| 70846 | 6739 DAWSON ST | POINT (-123.04939 49.22227) | Nicole | Ayers | NicoleAyers52@gmail.com |
| 8173 | 3541 W 14TH AV | POINT (-123.18292 49.26004) | Tammy | Johnson | TammyJohnson67@gmail.com |
| 21773 | 2086 E 35TH AV | POINT (-123.06379 49.23791) | Jamie | Nelson | JamieNelson64@gmail.com |
| 34921 | 504 E 29TH AV | POINT (-123.09397 49.24430) | Charlene | Garcia | CharleneGarcia66@gmail.com |
| 52167 | 5015 ROSS ST | POINT (-123.08269 49.23913) | Karen | Elliott | KarenElliott9@gmail.com |
| ... | ... | ... | ... | ... | ... |
| 50952 | 4194 INVERNESS ST | POINT (-123.07968 49.24765) | Samantha | Hess | SamanthaHess0@gmail.com |
| 75230 | 567 W 17TH AV | POINT (-123.11678 49.25629) | Natalie | Morris | NatalieMorris45@gmail.com |
| 14600 | 1988 W 42ND AV | POINT (-123.15144 49.23328) | Debbie | Howell | DebbieHowell40@gmail.com |
| 29592 | 708 DENMAN ST | POINT (-123.13439 49.29186) | Scott | Patel | ScottPatel91@gmail.com |
| 13527 | 6422 LABURNUM ST | POINT (-123.15050 49.22800) | Charles | Carpenter | CharlesCarpenter37@gmail.com |
10000 rows × 5 columns
df2['email']=df2.apply(lambda row:row['email'].lower(), axis=1)
df3=df2
df3.columns
Index(['address', 'geometry', 'first_name', 'last_name', 'email'], dtype='object')
df3=df3[['first_name','last_name','email','address','geometry']]
df3
| first_name | last_name | address | geometry | ||
|---|---|---|---|---|---|
| 70846 | Nicole | Ayers | nicoleayers52@gmail.com | 6739 DAWSON ST | POINT (-123.04939 49.22227) |
| 8173 | Tammy | Johnson | tammyjohnson67@gmail.com | 3541 W 14TH AV | POINT (-123.18292 49.26004) |
| 21773 | Jamie | Nelson | jamienelson64@gmail.com | 2086 E 35TH AV | POINT (-123.06379 49.23791) |
| 34921 | Charlene | Garcia | charlenegarcia66@gmail.com | 504 E 29TH AV | POINT (-123.09397 49.24430) |
| 52167 | Karen | Elliott | karenelliott9@gmail.com | 5015 ROSS ST | POINT (-123.08269 49.23913) |
| ... | ... | ... | ... | ... | ... |
| 50952 | Samantha | Hess | samanthahess0@gmail.com | 4194 INVERNESS ST | POINT (-123.07968 49.24765) |
| 75230 | Natalie | Morris | nataliemorris45@gmail.com | 567 W 17TH AV | POINT (-123.11678 49.25629) |
| 14600 | Debbie | Howell | debbiehowell40@gmail.com | 1988 W 42ND AV | POINT (-123.15144 49.23328) |
| 29592 | Scott | Patel | scottpatel91@gmail.com | 708 DENMAN ST | POINT (-123.13439 49.29186) |
| 13527 | Charles | Carpenter | charlescarpenter37@gmail.com | 6422 LABURNUM ST | POINT (-123.15050 49.22800) |
10000 rows × 5 columns
def generate_random_time(_):
start_time = pd.to_timedelta('10:00:00')
end_time = pd.to_timedelta('22:00:00')
random_seconds = np.random.randint(start_time.total_seconds(), end_time.total_seconds())
random_timedelta = pd.to_timedelta(random_seconds, unit='s')
return random_timedelta
df4=df3 # It is good to create a copy
import random
df4['date']=df4.apply(lambda row: generate_random_time(row), axis=1)
df4=df4.drop(['date'],axis=1)
df4['date']=df4['date'].to_string()
df4['date'] = df4['date'].astype(str).str.replace('0 days ', '')
df4.head()
| first_name | last_name | address | geometry | date | ||
|---|---|---|---|---|---|---|
| 70846 | Nicole | Ayers | nicoleayers52@gmail.com | 6739 DAWSON ST | POINT (-123.04939 49.22227) | 19:10:23 |
| 8173 | Tammy | Johnson | tammyjohnson67@gmail.com | 3541 W 14TH AV | POINT (-123.18292 49.26004) | 18:21:34 |
| 21773 | Jamie | Nelson | jamienelson64@gmail.com | 2086 E 35TH AV | POINT (-123.06379 49.23791) | 12:53:15 |
| 34921 | Charlene | Garcia | charlenegarcia66@gmail.com | 504 E 29TH AV | POINT (-123.09397 49.24430) | 20:21:26 |
| 52167 | Karen | Elliott | karenelliott9@gmail.com | 5015 ROSS ST | POINT (-123.08269 49.23913) | 15:23:31 |
df4=df4.rename(columns={'date':'order_time'})
df4
| first_name | last_name | address | geometry | order_time | ||
|---|---|---|---|---|---|---|
| 70846 | Nicole | Ayers | nicoleayers52@gmail.com | 6739 DAWSON ST | POINT (-123.04939 49.22227) | 19:10:23 |
| 8173 | Tammy | Johnson | tammyjohnson67@gmail.com | 3541 W 14TH AV | POINT (-123.18292 49.26004) | 18:21:34 |
| 21773 | Jamie | Nelson | jamienelson64@gmail.com | 2086 E 35TH AV | POINT (-123.06379 49.23791) | 12:53:15 |
| 34921 | Charlene | Garcia | charlenegarcia66@gmail.com | 504 E 29TH AV | POINT (-123.09397 49.24430) | 20:21:26 |
| 52167 | Karen | Elliott | karenelliott9@gmail.com | 5015 ROSS ST | POINT (-123.08269 49.23913) | 15:23:31 |
| ... | ... | ... | ... | ... | ... | ... |
| 50952 | Samantha | Hess | samanthahess0@gmail.com | 4194 INVERNESS ST | POINT (-123.07968 49.24765) | 19:19:00 |
| 75230 | Natalie | Morris | nataliemorris45@gmail.com | 567 W 17TH AV | POINT (-123.11678 49.25629) | 21:45:45 |
| 14600 | Debbie | Howell | debbiehowell40@gmail.com | 1988 W 42ND AV | POINT (-123.15144 49.23328) | 11:56:04 |
| 29592 | Scott | Patel | scottpatel91@gmail.com | 708 DENMAN ST | POINT (-123.13439 49.29186) | 14:16:03 |
| 13527 | Charles | Carpenter | charlescarpenter37@gmail.com | 6422 LABURNUM ST | POINT (-123.15050 49.22800) | 13:51:45 |
10000 rows × 6 columns
def generate_random_datetime2(_):
start_date = pd.to_datetime('2023-01-01')
end_date = pd.to_datetime('2023-12-31')
random_datetime = pd.to_datetime(random.choice(pd.date_range(start_date, end_date, freq='D')))
return random_datetime
#Now we create the function to generate daily transactions of our fake shop's operation
2023-08-02 00:00:00
df4['order_date']=df4.apply(lambda row: generate_random_datetime2(row), axis=1)
df4
| first_name | last_name | address | geometry | order_time | order_date | ||
|---|---|---|---|---|---|---|---|
| 70846 | Nicole | Ayers | nicoleayers52@gmail.com | 6739 DAWSON ST | POINT (-123.04939 49.22227) | 19:10:23 | 2023-11-24 |
| 8173 | Tammy | Johnson | tammyjohnson67@gmail.com | 3541 W 14TH AV | POINT (-123.18292 49.26004) | 18:21:34 | 2023-09-15 |
| 21773 | Jamie | Nelson | jamienelson64@gmail.com | 2086 E 35TH AV | POINT (-123.06379 49.23791) | 12:53:15 | 2023-05-12 |
| 34921 | Charlene | Garcia | charlenegarcia66@gmail.com | 504 E 29TH AV | POINT (-123.09397 49.24430) | 20:21:26 | 2023-06-27 |
| 52167 | Karen | Elliott | karenelliott9@gmail.com | 5015 ROSS ST | POINT (-123.08269 49.23913) | 15:23:31 | 2023-11-20 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 50952 | Samantha | Hess | samanthahess0@gmail.com | 4194 INVERNESS ST | POINT (-123.07968 49.24765) | 19:19:00 | 2023-09-29 |
| 75230 | Natalie | Morris | nataliemorris45@gmail.com | 567 W 17TH AV | POINT (-123.11678 49.25629) | 21:45:45 | 2023-06-07 |
| 14600 | Debbie | Howell | debbiehowell40@gmail.com | 1988 W 42ND AV | POINT (-123.15144 49.23328) | 11:56:04 | 2023-05-31 |
| 29592 | Scott | Patel | scottpatel91@gmail.com | 708 DENMAN ST | POINT (-123.13439 49.29186) | 14:16:03 | 2023-11-11 |
| 13527 | Charles | Carpenter | charlescarpenter37@gmail.com | 6422 LABURNUM ST | POINT (-123.15050 49.22800) | 13:51:45 | 2023-07-17 |
10000 rows × 7 columns
df4
| first_name | last_name | address | geometry | order_time | order_date | ||
|---|---|---|---|---|---|---|---|
| 70846 | Nicole | Ayers | nicoleayers52@gmail.com | 6739 DAWSON ST | POINT (-123.04939 49.22227) | 19:10:23 | 2023-11-24 |
| 8173 | Tammy | Johnson | tammyjohnson67@gmail.com | 3541 W 14TH AV | POINT (-123.18292 49.26004) | 18:21:34 | 2023-09-15 |
| 21773 | Jamie | Nelson | jamienelson64@gmail.com | 2086 E 35TH AV | POINT (-123.06379 49.23791) | 12:53:15 | 2023-05-12 |
| 34921 | Charlene | Garcia | charlenegarcia66@gmail.com | 504 E 29TH AV | POINT (-123.09397 49.24430) | 20:21:26 | 2023-06-27 |
| 52167 | Karen | Elliott | karenelliott9@gmail.com | 5015 ROSS ST | POINT (-123.08269 49.23913) | 15:23:31 | 2023-11-20 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 50952 | Samantha | Hess | samanthahess0@gmail.com | 4194 INVERNESS ST | POINT (-123.07968 49.24765) | 19:19:00 | 2023-09-29 |
| 75230 | Natalie | Morris | nataliemorris45@gmail.com | 567 W 17TH AV | POINT (-123.11678 49.25629) | 21:45:45 | 2023-06-07 |
| 14600 | Debbie | Howell | debbiehowell40@gmail.com | 1988 W 42ND AV | POINT (-123.15144 49.23328) | 11:56:04 | 2023-05-31 |
| 29592 | Scott | Patel | scottpatel91@gmail.com | 708 DENMAN ST | POINT (-123.13439 49.29186) | 14:16:03 | 2023-11-11 |
| 13527 | Charles | Carpenter | charlescarpenter37@gmail.com | 6422 LABURNUM ST | POINT (-123.15050 49.22800) | 13:51:45 | 2023-07-17 |
10000 rows × 7 columns
df4['order_price']=df4.apply(lambda row: round(random.uniform(5.00, 99.99), 2), axis=1)
df4
| first_name | last_name | address | geometry | order_time | order_date | order_price | ||
|---|---|---|---|---|---|---|---|---|
| 70846 | Nicole | Ayers | nicoleayers52@gmail.com | 6739 DAWSON ST | POINT (-123.04939 49.22227) | 19:10:23 | 2023-11-24 | 67.53 |
| 8173 | Tammy | Johnson | tammyjohnson67@gmail.com | 3541 W 14TH AV | POINT (-123.18292 49.26004) | 18:21:34 | 2023-09-15 | 45.75 |
| 21773 | Jamie | Nelson | jamienelson64@gmail.com | 2086 E 35TH AV | POINT (-123.06379 49.23791) | 12:53:15 | 2023-05-12 | 30.66 |
| 34921 | Charlene | Garcia | charlenegarcia66@gmail.com | 504 E 29TH AV | POINT (-123.09397 49.24430) | 20:21:26 | 2023-06-27 | 58.87 |
| 52167 | Karen | Elliott | karenelliott9@gmail.com | 5015 ROSS ST | POINT (-123.08269 49.23913) | 15:23:31 | 2023-11-20 | 16.55 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 50952 | Samantha | Hess | samanthahess0@gmail.com | 4194 INVERNESS ST | POINT (-123.07968 49.24765) | 19:19:00 | 2023-09-29 | 47.40 |
| 75230 | Natalie | Morris | nataliemorris45@gmail.com | 567 W 17TH AV | POINT (-123.11678 49.25629) | 21:45:45 | 2023-06-07 | 68.75 |
| 14600 | Debbie | Howell | debbiehowell40@gmail.com | 1988 W 42ND AV | POINT (-123.15144 49.23328) | 11:56:04 | 2023-05-31 | 58.84 |
| 29592 | Scott | Patel | scottpatel91@gmail.com | 708 DENMAN ST | POINT (-123.13439 49.29186) | 14:16:03 | 2023-11-11 | 75.50 |
| 13527 | Charles | Carpenter | charlescarpenter37@gmail.com | 6422 LABURNUM ST | POINT (-123.15050 49.22800) | 13:51:45 | 2023-07-17 | 37.56 |
10000 rows × 8 columns
import requests
api_key = "{YOUR_KEY}"
address = "6739 Dawson st ,Vancouver, BC"
url = f"https://dev.virtualearth.net/REST/v1/Locations?q={address}&key={api_key}"
response = requests.get(url)
response.json()
{'authenticationResultCode': 'ValidCredentials',
'brandLogoUri': 'http://dev.virtualearth.net/Branding/logo_powered_by.png',
'copyright': 'Copyright © 2023 Microsoft and its suppliers. All rights reserved. This API cannot be accessed and the content and any results may not be used, reproduced or transmitted in any manner without express written permission from Microsoft Corporation.',
'resourceSets': [{'estimatedTotal': 1,
'resources': [{'__type': 'Location:http://schemas.microsoft.com/search/local/ws/rest/v1',
'bbox': [49.218403082429326,
-123.05726187870246,
49.22612851757068,
-123.04149192129753],
'name': '6739 Dawson St, Vancouver, BC V5S 2W4, Canada',
'point': {'type': 'Point', 'coordinates': [49.2222658, -123.0493769]},
'address': {'addressLine': '6739 Dawson St',
'adminDistrict': 'BC',
'adminDistrict2': 'Greater Vancouver',
'countryRegion': 'Canada',
'formattedAddress': '6739 Dawson St, Vancouver, BC V5S 2W4, Canada',
'locality': 'Vancouver',
'neighborhood': 'Killarney',
'postalCode': 'V5S 2W4'},
'confidence': 'High',
'entityType': 'Address',
'geocodePoints': [{'type': 'Point',
'coordinates': [49.2222658, -123.0493769],
'calculationMethod': 'Rooftop',
'usageTypes': ['Display']},
{'type': 'Point',
'coordinates': [49.2222555, -123.0489161],
'calculationMethod': 'Rooftop',
'usageTypes': ['Route']}],
'matchCodes': ['Good']}]}],
'statusCode': 200,
'statusDescription': 'OK',
'traceId': '78f96a75b58046dca3dc4130470b0392|MWH0032BEE|0.0.0.1|Ref A: 61AEBC11FCC74F09ABA746133B233313 Ref B: CO1EDGE1414 Ref C: 2023-09-18T18:22:35Z'}
if response.status_code == 200:
data = response.json()
try:
zip_code = data["resourceSets"][0]["resources"][0]["address"]["postalCode"]
print("Zip Code:", zip_code)
except KeyError:
print("Zip code not found in the response.")
else:
print("Request failed with status code:", response.status_code)
Zip Code: V5S 2W4
# df4['zip_code'] = df4.apply(lambda row: requests.get(f"https://dev.virtualearth.net/REST/v1/Locations?q={row['address']}&key={api_key}").json()["resourceSets"][0]["resources"][0]["address"]["postalCode"] if requests.get(f"https://dev.virtualearth.net/REST/v1/Locations?q={row['address']}&key={api_key}").status_code == 200 else np.nan, axis=1)
df4
| first_name | last_name | address | geometry | order_time | order_date | order_price | ||
|---|---|---|---|---|---|---|---|---|
| 70846 | Nicole | Ayers | nicoleayers52@gmail.com | 6739 DAWSON ST | POINT (-123.04939 49.22227) | 19:10:23 | 2023-11-24 | 67.53 |
| 8173 | Tammy | Johnson | tammyjohnson67@gmail.com | 3541 W 14TH AV | POINT (-123.18292 49.26004) | 18:21:34 | 2023-09-15 | 45.75 |
| 21773 | Jamie | Nelson | jamienelson64@gmail.com | 2086 E 35TH AV | POINT (-123.06379 49.23791) | 12:53:15 | 2023-05-12 | 30.66 |
| 34921 | Charlene | Garcia | charlenegarcia66@gmail.com | 504 E 29TH AV | POINT (-123.09397 49.24430) | 20:21:26 | 2023-06-27 | 58.87 |
| 52167 | Karen | Elliott | karenelliott9@gmail.com | 5015 ROSS ST | POINT (-123.08269 49.23913) | 15:23:31 | 2023-11-20 | 16.55 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 50952 | Samantha | Hess | samanthahess0@gmail.com | 4194 INVERNESS ST | POINT (-123.07968 49.24765) | 19:19:00 | 2023-09-29 | 47.40 |
| 75230 | Natalie | Morris | nataliemorris45@gmail.com | 567 W 17TH AV | POINT (-123.11678 49.25629) | 21:45:45 | 2023-06-07 | 68.75 |
| 14600 | Debbie | Howell | debbiehowell40@gmail.com | 1988 W 42ND AV | POINT (-123.15144 49.23328) | 11:56:04 | 2023-05-31 | 58.84 |
| 29592 | Scott | Patel | scottpatel91@gmail.com | 708 DENMAN ST | POINT (-123.13439 49.29186) | 14:16:03 | 2023-11-11 | 75.50 |
| 13527 | Charles | Carpenter | charlescarpenter37@gmail.com | 6422 LABURNUM ST | POINT (-123.15050 49.22800) | 13:51:45 | 2023-07-17 | 37.56 |
10000 rows × 8 columns
df8=df4.sample(1000) # df5 tiene los zip codes ,ahora meterle el df7,ahora meterle el df8
response = requests.get(f'http://dev.virtualearth.net/REST/v1/Locations/CA/BC/Vancouver/3475 W 11th AV?includeNeighborhood=true&key={api_key}')
response.json()["resourceSets"][0]["resources"][0]["address"]["postalCode"]
'V6R 2K1'
def get_zip_code(row):
url = f'http://dev.virtualearth.net/REST/v1/Locations/CA/BC/Vancouver/{row["address"]}?includeNeighborhood=true&key={api_key}'
response = requests.get(url)
if response.status_code == 200:
data = response.json()
resource_sets = data.get("resourceSets", [])
if resource_sets:
resources = resource_sets[0].get("resources", [])
if resources:
address = resources[0].get("address", {})
postal_code = address.get("postalCode")
if postal_code:
return postal_code
return np.nan
df8['zip_code'] = df8.apply(get_zip_code, axis=1)
df5
| first_name | last_name | address | geometry | order_time | order_date | order_price | zip_code | ||
|---|---|---|---|---|---|---|---|---|---|
| 18955 | Bradley | Brown | bradleybrown88@gmail.com | 1488 W 37TH AV | POINT (-123.13854 49.23774) | 12:56:45 | 2023-12-22 | 41.00 | V6M 1M3 |
| 91319 | Joseph | Ingram | josephingram83@gmail.com | 755 E BROADWAY | POINT (-123.08782 49.26276) | 21:01:49 | 2023-07-03 | 76.16 | V5T 1X8 |
| 62280 | James | Lewis | jameslewis41@gmail.com | 1521 COMMERCIAL DRIVE | POINT (-123.06994 49.27102) | 13:34:17 | 2023-06-09 | 10.94 | V5L 3Y1 |
| 50227 | Sarah | Mooney | sarahmooney34@gmail.com | 633 BUCKETWHEEL | POINT (-123.11823 49.26800) | 12:02:57 | 2023-01-27 | 13.16 | NaN |
| 29038 | Jeffrey | Clark | jeffreyclark66@gmail.com | 2103 W 49TH AV | POINT (-123.15614 49.22742) | 13:40:17 | 2023-12-12 | 13.08 | V6M 2T5 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 62803 | Thomas | Moore | thomasmoore53@gmail.com | 5569 BRUCE ST | POINT (-123.07051 49.23394) | 19:37:01 | 2023-05-21 | 86.29 | V5P 3M5 |
| 18044 | Lori | Mccoy | lorimccoy28@gmail.com | 5898 QUEBEC ST | POINT (-123.10327 49.23106) | 18:29:03 | 2023-08-26 | 85.22 | V5W 2P1 |
| 33746 | Linda | Callahan | lindacallahan45@gmail.com | 2888 W 39TH AV | POINT (-123.16963 49.23639) | 14:04:52 | 2023-06-06 | 11.61 | V6N 2Z4 |
| 70413 | Laura | Carr | lauracarr2@gmail.com | 3042 E 28TH AV | POINT (-123.04046 49.24515) | 13:00:45 | 2023-04-11 | 95.76 | V5R 1S5 |
| 69679 | Eric | Smith | ericsmith13@gmail.com | 3225 E 25TH AV | POINT (-123.03580 49.24812) | 18:16:21 | 2023-09-17 | 52.91 | V5R 1J5 |
1000 rows × 9 columns
df7
| first_name | last_name | address | geometry | order_time | order_date | order_price | zip_code | ||
|---|---|---|---|---|---|---|---|---|---|
| 37288 | Mark | French | markfrench97@gmail.com | 6558 CLARENDON ST | POINT (-123.05582 49.22430) | 19:57:57 | 2023-06-19 | 80.77 | V5S 2K3 |
| 79299 | Donna | James | donnajames6@gmail.com | 1019 PACIFIC ST | POINT (-123.13292 49.27771) | 15:09:11 | 2023-08-11 | 57.62 | V6E 1S9 |
| 62146 | Ashley | Johnson | ashleyjohnson53@gmail.com | 1429 E 4TH AV | POINT (-123.07503 49.26718) | 13:43:04 | 2023-01-09 | 18.91 | V5N 1J6 |
| 12307 | Allison | Tucker | allisontucker43@gmail.com | 3375 W 12TH AV | POINT (-123.17926 49.26181) | 19:36:20 | 2023-07-10 | 40.15 | V6R 2M8 |
| 54163 | Sara | Allen | saraallen9@gmail.com | 2606 KEITH DRIVE | POINT (-123.07911 49.26128) | 14:03:56 | 2023-01-03 | 40.08 | V5T 4C6 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 43195 | Charles | Nelson | charlesnelson79@gmail.com | 2237 NEWPORT AV | POINT (-123.06133 49.21552) | 20:33:55 | 2023-09-25 | 67.28 | V5P 2J1 |
| 77200 | Theresa | Villa | theresavilla87@gmail.com | 3363 VENABLES ST | POINT (-123.03224 49.27685) | 19:43:23 | 2023-07-22 | 44.41 | V5K 2S7 |
| 2735 | Brooke | Jones | brookejones87@gmail.com | 1923 E 5TH AV | POINT (-123.06522 49.26619) | 18:12:41 | 2023-07-27 | 46.92 | V5N 1M1 |
| 90100 | Lisa | Hernandez | lisahernandez7@gmail.com | 6445 WINDSOR ST | POINT (-123.08568 49.22600) | 17:41:40 | 2023-07-08 | 49.97 | V5W 3J4 |
| 71223 | Brent | Collins | brentcollins52@gmail.com | 837 WINDERMERE ST | POINT (-123.03670 49.27678) | 11:55:01 | 2023-05-15 | 57.74 | V5K 4J5 |
1000 rows × 9 columns
df8
| first_name | last_name | address | geometry | order_time | order_date | order_price | zip_code | ||
|---|---|---|---|---|---|---|---|---|---|
| 55204 | Juan | Carlson | juancarlson33@gmail.com | 85 W 62ND AV | POINT (-123.10820 49.21448) | 10:17:49 | 2023-11-24 | 50.76 | V5X 2C9 |
| 97233 | Kevin | Johnson | kevinjohnson15@gmail.com | 3450 PRICE ST | POINT (-123.03043 49.24198) | 11:19:42 | 2023-07-10 | 26.14 | V5R 5R1 |
| 53122 | Brad | Prince | bradprince23@gmail.com | 5121 ELGIN ST | POINT (-123.08387 49.23807) | 20:46:02 | 2023-02-02 | 93.07 | V5W 3J9 |
| 17666 | Kathryn | Simmons | kathrynsimmons33@gmail.com | 131 E 27TH AV | POINT (-123.10269 49.24747) | 17:43:45 | 2023-06-27 | 92.40 | V5V 2K5 |
| 32651 | Daniel | Harrison | danielharrison25@gmail.com | 955 W 18TH AV | POINT (-123.12583 49.25556) | 15:19:51 | 2023-05-24 | 89.89 | V5Z 1W4 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 96703 | Joshua | Gray | joshuagray36@gmail.com | 4586 HOY ST | POINT (-123.02460 49.24362) | 19:49:47 | 2023-11-13 | 75.13 | V5R 4N4 |
| 19781 | Henry | Stone | henrystone58@gmail.com | 1269 W 27TH AV | POINT (-123.13357 49.24758) | 20:56:08 | 2023-01-08 | 56.87 | V6H 2C1 |
| 37945 | Mitchell | Mason | mitchellmason68@gmail.com | 2069 E BROADWAY | POINT (-123.06270 49.26257) | 10:37:31 | 2023-02-02 | 9.68 | V5N 1W6 |
| 35492 | Ryan | Blankenship | ryanblankenship20@gmail.com | 160 E PENDER ST | POINT (-123.10097 49.28015) | 12:58:30 | 2023-02-11 | 78.42 | V6A 1T3 |
| 51989 | Dawn | Bowen | dawnbowen2@gmail.com | 3698 GLEN DRIVE | POINT (-123.08152 49.25190) | 12:51:07 | 2023-01-11 | 38.16 | V5V 4S6 |
1000 rows × 9 columns
df_final=pd.concat([df5,df7,df8])
df_final=df_final.drop_duplicates()
df_final.shape
(2717, 9)
df_final.to_csv('C:/Users/OWNER/Downloads/proyecto_base_datos/vancouver_2700_no_idx.csv', index=False)
df_final.to_csv('C:/Users/OWNER/Downloads/proyecto_base_datos/vancouver_2700_idx.csv', index=True)