diff --git a/data_treatment.py b/data_treatment.py index 1d7b37e..a2882ea 100644 --- a/data_treatment.py +++ b/data_treatment.py @@ -1,10 +1,13 @@ import json +import os import pandas as pd import numpy as np import requests +from dotenv import load_dotenv +# Carregar variáveis de ambiente do arquivo .env +load_dotenv() -# ================================================================= # Tratamento de dados de vendas df_data = pd.read_csv("Dataset/nyc-rolling-sales.csv", index_col=0) @@ -13,15 +16,17 @@ df_data[object_cols] = df_data[object_cols].astype(np.float) df_data["SALE DATE"] = pd.to_datetime(df_data["SALE DATE"]) -df_data = df_data[df_data["SALE PRICE"] != 0] -df_data = df_data[df_data["LAND SQUARE FEET"] != 0] -df_data = df_data[df_data["GROSS SQUARE FEET"] != 0] +df_data = df_data[(df_data["SALE PRICE"] != 0) & (df_data["LAND SQUARE FEET"] != 0) & (df_data["GROSS SQUARE FEET"] != 0)] -# ================================================================= # Tratamento de dados LATITUDES e LONGITUDES -here_api = open("keys/here_api").read() -dict_address = json.load(open('dict_notes.json')) +# Chave da API do Google Maps +google_api_key = os.getenv("GOOGLE_API_KEY") + +# Carregar dicionário de endereços do arquivo JSON +with open('dict_notes.json') as json_file: + dict_address = json.load(json_file) + error = [] c = 0 total = len(df_data["ADDRESS"].unique()) @@ -30,34 +35,43 @@ try: if address in dict_address.keys(): continue - URL = "https://geocode.search.hereapi.com/v1/geocode" - location = address + ", NYC" - PARAMS = {'apikey':here_api,'q':location} - r = requests.get(url = URL, params = PARAMS) + + # Fazer solicitação HTTP para obter latitude e longitude + URL = "https://maps.googleapis.com/maps/api/geocode/json" + location = address + ", NYC" + PARAMS = {'address': location, 'key': google_api_key} + r = requests.get(url=URL, params=PARAMS) data = r.json() - lat = data['items'][0]['position']['lat'] - long = data['items'][0]['position']['lng'] - dict_address[address] = {"latitude": lat, "longitude": long} - with open('dict_notes.json', 'w') as f: - json.dump(dict_address, f) - + # Verificar se a resposta possui dados válidos + if 'results' in data and data['results']: + lat = data['results'][0]['geometry']['location']['lat'] + lng = data['results'][0]['geometry']['location']['lng'] + dict_address[address] = {"latitude": lat, "longitude": lng} + with open('dict_notes.json', 'w') as f: + json.dump(dict_address, f) + else: + error.append(address) except Exception as e: - print(e) - error += [address] + print("Erro ao processar endereço:", address, "-", e) + error.append(address) c += 1 print(c, total) -# =================================== # Tratamento final -dict_address = json.load(open('dict_notes.json')) +# Carregar dicionário de endereços do arquivo JSON novamente +with open('dict_notes.json') as json_file: + dict_address = json.load(json_file) -# LATITUDE AND LONGITUDE +# Extrair latitudes e longitudes do dicionário dict_lat = {key: value["latitude"] for key, value in dict_address.items()} dict_long = {key: value["longitude"] for key, value in dict_address.items()} +# Mapear latitudes e longitudes no dataframe df_data["LATITUDE"] = df_data["ADDRESS"].map(dict_lat) df_data["LONGITUDE"] = df_data["ADDRESS"].map(dict_long) + +# Salvar os dados tratados em um novo arquivo CSV df_data.to_csv("Dataset/cleaned_data.csv")