How to retrieve files from the platform
How to retrieve files from the platform
In this guide, you will learn how to retrieve files from Energyworx's file storage using the APIv1.5. There are other ways to authenticate against the API, but in this guide, we will use an API token.
Generating the API Token
Note that this step requires admin rights, if you do not have sufficient permissions, please reach out to your project admin to discuss your needs.
Use the POST API call /ewx/v1/admin/user/api with the user ID (found in the Users section under Identity Access Management). This will generate an apiKeyToken, which you need to authenticate. Mind that you do not share this token with others- as it identifies you for making the calls to the API
Information you need
We have provided an example script below, to get you started. However, to further enable you, we will cover the script variables so you are enabled to customize it.
- project : the environment you will be downloading the files from- mind, this is not the URL or namespace, but the project ID specified in GCP.
- namespace****_id : the namespace id where the files are located, e.g.
enrx_org_001 - api_key : the key generated in the first step
- download_destination_folder : the folder where you want to store the file(s). If it does not exist, it will be created.
- params : this enables you to set search parameters for the files you are targeting. All available parameters can be found in the API documentation. The most relevant ones are
filenameandtags - limit : number of files to download that match the search query (params)
Example Script
from pathlib import Path
import requests
import ssl
import typing
import urllib.request
from urllib.parse import unquote, urlparse
import numpy as np
from tqdm import tqdm
# Variables
project: str = "PROJECT ID"
namespace_id: str = "NAMESPACE ID"
api_key: str = "GENERATED API KEY"
download_destination_folder: str = "DESTINATION FOLDER"
params: dict = {'filename': 'Actual.csv',
'tags': ['month:april']}
limit: int = 1
def get_client_session(project: str, namespace_id: str):
headers = {
"Accept": "application/json",
"X-NAMESPACE": namespace_id,
"X-API-KEY": api_key
}
session = requests.Session()
session.headers.update(headers)
global base_url
base_url = f"https://api.{project}.energyworx.net/ewx/v1/"
return session
# Internal variables
global base_url
session = get_client_session(project, namespace_id)
context = ssl.SSLContext(protocol=ssl.PROTOCOL_TLS)
def iter_all_pages(url: str, static_params: dict, limit: int | None = None) -> typing.Iterator:
"""
Retrieve all pages in a collection.
"""
# Loop over all pages in this collection and yield everything
item_limit = np.infty if limit is None else limit
try:
more = True
page = None
while more:
limit_size = min(100, item_limit)
params = {"pageToken": page, "limit": limit_size}
params.update(static_params)
data = session.get(url, params=params).json()
yield data
page = data.get("pageToken") or data.get("nextPageToken")
item_limit -= limit_size
more = (data.get("more") and item_limit)
except Exception:
raise
def get_all_file_entries():
for page in iter_all_pages(f"{base_url}storage/files/search", params, limit):
yield from page.get('items', [])
def get_file_urls():
return [x['signedUrl'] for x in get_all_file_entries()]
def download_files(file_urls: list[str], dest_dir: str):
# Create destination folder
dest_dir = Path(dest_dir)
dest_dir.mkdir(parents=True, exist_ok=True)
# Download every single file
print(f"Destination directory set to {dest_dir.name}.")
pbar = tqdm(file_urls, desc='Downloading files...', dynamic_ncols=True)
for file_url in pbar:
file_name = unquote(Path(urlparse(file_url).path).name)
pbar.set_postfix_str(file_name)
with urllib.request.urlopen(file_url, context=context) as file:
file_contents = file.read().decode('ascii')
with open(dest_dir / (file_name.replace(':', '_')), 'w') as file:
file.write(file_contents)
def main():
download_files(get_file_urls(), download_destination_folder)
if __name__ == '__main__':
main()