Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean commit - added metadatat files to clean MAIN #27

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# System
.DS_Store

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
3 changes: 3 additions & 0 deletions config/config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@ default_profile=ci

[prod]
pennsieve_profile_name=prod
scicrunch_api_key=YOUR_KEY_HERE

[dev]
pennsieve_profile_name=test
scicrunch_api_key=YOUR_KEY_HERE

[ci]
pennsieve_profile_name=ci
scicrunch_api_key=YOUR_KEY_HERE
80 changes: 80 additions & 0 deletions docs/examples_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# Set of example call to the metadata services within the SPARC client
# Make sure to get an API key and add to your config/config.ini file
# Add key to the scicrunch_api_key attribute
# Instructions for getting an API key can be found at: https://fdilab.gitbook.io/api-handbook/sparc-k-core-api-overview/getting-started-with-sparc-apis

import json
from sparc.client import SparcClient
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatters] reported by reviewdog 🐶

Suggested change
from sparc.client import SparcClient
from sparc.client import SparcClient


client = SparcClient(connect=False, config_file='../config/config.ini')

# Connect
response = client.metadata.connect()

if response == "https://scicrunch.org/api/1/elastic":
test_pass = True
else:
test_pass = False

print( str(test_pass) )

# Get Info
response = client.metadata.info()

if response == "https://scicrunch.org/api/1/elastic":
test_pass = True
else:
test_pass = False

print( str(test_pass) )

# ES list datasets
response = {}
response = client.metadata.list_datasets()

check_response = response['hits']['total']
if check_response > 200:
test_pass = True
else:
test_pass = False

print( str(test_pass) )

# ES search via default
response = {}
response = client.metadata.search_datasets()

check_response = response['hits']['total']
if check_response > 200:
test_pass = True
else:
test_pass = False

print( str(test_pass) )

# ES search via JSON string
response = {}
response = client.metadata.search_datasets("{\"query\": {\"terms\": {\"_id\": [ \"136\", \"95\" ] } } }")

check_response = response['hits']['total']
if check_response == 2:
test_pass = True
else:
test_pass = False

print( str(test_pass) )

# ES search via JSON object
response = {}
body = "{\"query\": {\"terms\": {\"_id\": [ \"136\", \"95\" ] } } }"
body_json = json.loads(body)

response = client.metadata.search_datasets(body_json)

check_response = response['hits']['total']
if check_response == 2:
test_pass = True
else:
test_pass = False

print( str(test_pass) )
281 changes: 281 additions & 0 deletions src/sparc/client/services/metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,281 @@
import logging
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatters] reported by reviewdog 🐶

Suggested change
import logging

import json

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatters] reported by reviewdog 🐶

Suggested change
import logging
from configparser import SectionProxy
from typing import List, Optional, Union

import requests
from requests.adapters import HTTPAdapter, Retry

from configparser import SectionProxy
from typing import List, Optional, Union
Comment on lines +7 to +8
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatters] reported by reviewdog 🐶

Suggested change
from configparser import SectionProxy
from typing import List, Optional, Union

from ._default import ServiceBase


class MetadataService(ServiceBase):
"""A wrapper for the Elasticsearch Metadata library

Parameters:
-----------
config : dict
A configuration containing necessary API key (scicrunch_api_key).
connect : bool
Not needed with REST metadata services.

Attributes:
-----------
default_headers : dict
A dictionary with headers to make HTTP requests.
host_api : str
A default HTTP address of the SciCrunch Elasticsearch API endpoint.

Methods:
--------
get_profile() -> str
Returns the currently used API Key.
set_profile() -> str
Changes the API Key.
close() : None
Not needed with REST metadata services.
getURL(...) : dict
Supporting function to retrieve data from REST endpoint via GET
This support Elasticsearch URL based queries
postURL(...) : dict
Supporting function to retrieve data from REST endpoint
This supports Elasticsearch JSON queries
list_datasets(...) : dict
Returns a dictionary with datasets metadata.
search_datasets(...) : dict
Returns a dictionary with datasets matching search criteria.

"""

default_headers = {
"Content-Type": "application/json",
"Accept": "application/json; charset=utf-8",
}

host_api = "https://scicrunch.org/api/1/elastic"

scicrunch_api_key: str = None
profile_name: str = None

def __init__(
self, config: Optional[Union[dict, SectionProxy]] = None, connect: bool = False
) -> None:
logging.info("Initializing SPARC K-Core Elasticsearch services...")
logging.debug(str(config))

if config is not None:
self.scicrunch_api_key = config.get("scicrunch_api_key")
logging.info("SciCrunch API Key: Found")
self.profile_name = config.get("pennsieve_profile_name")
logging.info("Profile: " + self.profile_name)
else:
logging.warning("SciCrunch API Key: Not Found")
logging.info("Profile: none")
if connect:
self.connect()

def connect(self) -> str:
""" Not needed as metadata services are REST service calls """
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[blackfmt] reported by reviewdog 🐶

Suggested change
""" Not needed as metadata services are REST service calls """
"""Not needed as metadata services are REST service calls"""

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatters] reported by reviewdog 🐶

Suggested change
""" Not needed as metadata services are REST service calls """
"""Not needed as metadata services are REST service calls"""

logging.info("Metadata REST services available...")

self.host_api = "https://scicrunch.org/api/1/elastic"
return self.host_api

def info(self) -> str:
"""Returns information about the metadata search services."""

self.host_api = "https://scicrunch.org/api/1/elastic"
return self.host_api

def get_profile(self) -> str:
"""Returns currently used API key.

Returns:
--------
A string with API Key.
"""
return self.scicrunch_api_key

def set_profile(self, api_key: str) -> str:
"""Changes the API key to the specified name.

Parameters:
-----------
api_key : str
The API key to use.

Returns:
--------
A string with confirmation of API key switch.
"""
self.scicrunch_api_key = api_key
return self.scicrunch_api_key

def close(self) -> None:
"""Not needed as metadata services are REST service calls"""
return self.host_api

#####################################################################
# Supporting Functions
Comment on lines +118 to +119
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[blackfmt] reported by reviewdog 🐶

Suggested change
#####################################################################
# Supporting Functions
#####################################################################
# Supporting Functions

Comment on lines +118 to +119
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatters] reported by reviewdog 🐶

Suggested change
#####################################################################
# Supporting Functions
#####################################################################
# Supporting Functions


#####################################################################
# Function to GET content from URL with retries
Comment on lines +121 to +122
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[blackfmt] reported by reviewdog 🐶

Suggested change
#####################################################################
# Function to GET content from URL with retries
#####################################################################
# Function to GET content from URL with retries

Comment on lines +121 to +122
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatters] reported by reviewdog 🐶

Suggested change
#####################################################################
# Function to GET content from URL with retries
#####################################################################
# Function to GET content from URL with retries

def getURL(self, url, headers="NONE"):

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[blackfmt] reported by reviewdog 🐶

Suggested change

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatters] reported by reviewdog 🐶

Suggested change

result = "[ERROR]"
url_session = requests.Session()

retries = Retry(total=6,
backoff_factor=1,
status_forcelist=[403, 404, 413, 429, 500, 502, 503, 504])
Comment on lines +128 to +130
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[blackfmt] reported by reviewdog 🐶

Suggested change
retries = Retry(total=6,
backoff_factor=1,
status_forcelist=[403, 404, 413, 429, 500, 502, 503, 504])
retries = Retry(
total=6, backoff_factor=1, status_forcelist=[403, 404, 413, 429, 500, 502, 503, 504]
)

Comment on lines +128 to +130
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatters] reported by reviewdog 🐶

Suggested change
retries = Retry(total=6,
backoff_factor=1,
status_forcelist=[403, 404, 413, 429, 500, 502, 503, 504])
retries = Retry(
total=6, backoff_factor=1, status_forcelist=[403, 404, 413, 429, 500, 502, 503, 504]
)


url_session.mount('https://', HTTPAdapter(max_retries=retries))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[blackfmt] reported by reviewdog 🐶

Suggested change
url_session.mount('https://', HTTPAdapter(max_retries=retries))
url_session.mount("https://", HTTPAdapter(max_retries=retries))

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatters] reported by reviewdog 🐶

Suggested change
url_session.mount('https://', HTTPAdapter(max_retries=retries))
url_session.mount("https://", HTTPAdapter(max_retries=retries))


success = 1

try:
if headers == "NONE":
url_result = url_session.get(url)
else:
url_result = url_session.get(url, headers=headers)

if url_result.status_code == 410:
logging.warning("Retrieval Status 410 - URL Unpublished:" + url)
else:
url_result.raise_for_status()

except requests.exceptions.HTTPError as errh:
logging.error("Retrieving URL - HTTP Error:", errh)
success = 0
except requests.exceptions.ConnectionError as errc:
logging.error("Retrieving URL - Error Connecting:", errc)
success = 0
except requests.exceptions.Timeout as errt:
logging.error("Retrieving URL - Timeout Error:", errt)
success = 0
except requests.exceptions.RequestException as err:
logging.error("Retrieving URL - Something Else", err)
success = 0

url_session.close()

if success == 1:
result = url_result
else:
result = {}

return result.json()


#####################################################################
# Function to retrieve content via POST from URL with retries
Comment on lines +169 to +171
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[blackfmt] reported by reviewdog 🐶

Suggested change
#####################################################################
# Function to retrieve content via POST from URL with retries
#####################################################################
# Function to retrieve content via POST from URL with retries

Comment on lines +169 to +171
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatters] reported by reviewdog 🐶

Suggested change
#####################################################################
# Function to retrieve content via POST from URL with retries
#####################################################################
# Function to retrieve content via POST from URL with retries

def postURL(self, url, body, headers="NONE"):

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[blackfmt] reported by reviewdog 🐶

Suggested change

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatters] reported by reviewdog 🐶

Suggested change

result = "[ERROR]"
url_session = requests.Session()

retries = Retry(total=6,
backoff_factor=1,
status_forcelist=[403, 404, 413, 429, 500, 502, 503, 504])
Comment on lines +177 to +179
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[blackfmt] reported by reviewdog 🐶

Suggested change
retries = Retry(total=6,
backoff_factor=1,
status_forcelist=[403, 404, 413, 429, 500, 502, 503, 504])
retries = Retry(
total=6, backoff_factor=1, status_forcelist=[403, 404, 413, 429, 500, 502, 503, 504]
)

Comment on lines +177 to +179
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatters] reported by reviewdog 🐶

Suggested change
retries = Retry(total=6,
backoff_factor=1,
status_forcelist=[403, 404, 413, 429, 500, 502, 503, 504])
retries = Retry(
total=6, backoff_factor=1, status_forcelist=[403, 404, 413, 429, 500, 502, 503, 504]
)


url_session.mount('https://', HTTPAdapter(max_retries=retries))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[blackfmt] reported by reviewdog 🐶

Suggested change
url_session.mount('https://', HTTPAdapter(max_retries=retries))
url_session.mount("https://", HTTPAdapter(max_retries=retries))

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatters] reported by reviewdog 🐶

Suggested change
url_session.mount('https://', HTTPAdapter(max_retries=retries))
url_session.mount("https://", HTTPAdapter(max_retries=retries))


try:
if type(body) is dict:
body_json = body
else:
body_json = json.loads(body)
except:
logging.error("Elasticsearch query body can not be read")

Comment on lines +189 to +190
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[blackfmt] reported by reviewdog 🐶

Suggested change
logging.error("Elasticsearch query body can not be read")
logging.error("Elasticsearch query body can not be read")

Comment on lines +189 to +190
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatters] reported by reviewdog 🐶

Suggested change
logging.error("Elasticsearch query body can not be read")
logging.error("Elasticsearch query body can not be read")

success = 1

try:
if headers == "NONE":
url_result = url_session.post(url, json = body_json)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[blackfmt] reported by reviewdog 🐶

Suggested change
url_result = url_session.post(url, json = body_json)
url_result = url_session.post(url, json=body_json)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatters] reported by reviewdog 🐶

Suggested change
url_result = url_session.post(url, json = body_json)
url_result = url_session.post(url, json=body_json)

else:
url_result = url_session.post(url, json = body_json, headers=headers)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[blackfmt] reported by reviewdog 🐶

Suggested change
url_result = url_session.post(url, json = body_json, headers=headers)
url_result = url_session.post(url, json=body_json, headers=headers)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatters] reported by reviewdog 🐶

Suggested change
url_result = url_session.post(url, json = body_json, headers=headers)
url_result = url_session.post(url, json=body_json, headers=headers)


if url_result.status_code == 410:
logging.warning("Retrieval Status 410 - URL Unpublished:" + url)
else:
url_result.raise_for_status()

except requests.exceptions.HTTPError as errh:
logging.error("Retrieving URL - HTTP Error:", errh)
success = 0
except requests.exceptions.ConnectionError as errc:
logging.error("Retrieving URL - Error Connecting:", errc)
success = 0
except requests.exceptions.Timeout as errt:
logging.error("Retrieving URL - Timeout Error:", errt)
success = 0
except requests.exceptions.RequestException as err:
logging.error("Retrieving URL - Something Else", err)
success = 0

url_session.close()

if success == 1:
result = url_result
else:
result = {}

return result.json()

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[blackfmt] reported by reviewdog 🐶

Suggested change

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatters] reported by reviewdog 🐶

Suggested change


#####################################################################
# Metadata Search Functions
Comment on lines +227 to +228
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[blackfmt] reported by reviewdog 🐶

Suggested change
#####################################################################
# Metadata Search Functions
#####################################################################
# Metadata Search Functions

Comment on lines +227 to +228
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatters] reported by reviewdog 🐶

Suggested change
#####################################################################
# Metadata Search Functions
#####################################################################
# Metadata Search Functions


def list_datasets(
self,
limit: int = 10,
offset: int = 0
) -> list:
Comment on lines +230 to +234
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[blackfmt] reported by reviewdog 🐶

Suggested change
def list_datasets(
self,
limit: int = 10,
offset: int = 0
) -> list:
def list_datasets(self, limit: int = 10, offset: int = 0) -> list:

Comment on lines +230 to +234
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatters] reported by reviewdog 🐶

Suggested change
def list_datasets(
self,
limit: int = 10,
offset: int = 0
) -> list:
def list_datasets(self, limit: int = 10, offset: int = 0) -> list:

"""Lists datasets and associated metadata.

Parameters:
-----------
limit : int
Max number of datasets returned.
offset : int
Offset used for pagination of results.

Returns:
--------
A json with the results.

"""
self.host_api = "https://scicrunch.org/api/1/elastic/SPARC_Algolia_pr/_search"

list_url = self.host_api + "?" + "from=" + str(offset) + "&size=" + str(limit) + "&key=" + self.scicrunch_api_key
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[blackfmt] reported by reviewdog 🐶

Suggested change
list_url = self.host_api + "?" + "from=" + str(offset) + "&size=" + str(limit) + "&key=" + self.scicrunch_api_key
list_url = (
self.host_api
+ "?"
+ "from="
+ str(offset)
+ "&size="
+ str(limit)
+ "&key="
+ self.scicrunch_api_key
)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatters] reported by reviewdog 🐶

Suggested change
list_url = self.host_api + "?" + "from=" + str(offset) + "&size=" + str(limit) + "&key=" + self.scicrunch_api_key
list_url = (
self.host_api
+ "?"
+ "from="
+ str(offset)
+ "&size="
+ str(limit)
+ "&key="
+ self.scicrunch_api_key
)


list_results = self.getURL(list_url, headers=self.default_headers)
return list_results


Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[blackfmt] reported by reviewdog 🐶

Suggested change
def search_datasets(self, query: str = '{"query": { "match_all": {}}}') -> list:
"""Gets datasets matching specified query.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatters] reported by reviewdog 🐶

Suggested change
def search_datasets(self, query: str = '{"query": { "match_all": {}}}') -> list:
"""Gets datasets matching specified query.

def search_datasets(
self,
query: str = "{\"query\": { \"match_all\": {}}}"
) -> list:
"""Gets datasets matching specified query.

This function provides
Comment on lines +257 to +263
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[blackfmt] reported by reviewdog 🐶

Suggested change
def search_datasets(
self,
query: str = "{\"query\": { \"match_all\": {}}}"
) -> list:
"""Gets datasets matching specified query.
This function provides
This function provides

Comment on lines +257 to +263
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatters] reported by reviewdog 🐶

Suggested change
def search_datasets(
self,
query: str = "{\"query\": { \"match_all\": {}}}"
) -> list:
"""Gets datasets matching specified query.
This function provides
This function provides


Parameters:
-----------
query : str
Elasticsearch JSON query.

Returns:
--------
A json with the results.

"""

self.host_api = "https://scicrunch.org/api/1/elastic/SPARC_Algolia_pr/_search"

list_url = self.host_api + "?" + "key=" + self.scicrunch_api_key

list_results = self.postURL(list_url, body = query, headers=self.default_headers)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[blackfmt] reported by reviewdog 🐶

Suggested change
list_results = self.postURL(list_url, body = query, headers=self.default_headers)
list_results = self.postURL(list_url, body=query, headers=self.default_headers)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatters] reported by reviewdog 🐶

Suggested change
list_results = self.postURL(list_url, body = query, headers=self.default_headers)
list_results = self.postURL(list_url, body=query, headers=self.default_headers)

return list_results
Loading