Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add integration tests for all selection strategies #3851

Merged
merged 8 commits into from
Dec 3, 2024
Merged
43 changes: 19 additions & 24 deletions cumulusci/tasks/bulkdata/select_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,9 +352,6 @@ def annoy_post_process(
insertion_candidates = load_shaped_records
return selected_records, insertion_candidates

query_records = replace_empty_strings_with_missing(query_records)
select_shaped_records = replace_empty_strings_with_missing(select_shaped_records)

hash_features = 100
num_trees = 10

Expand Down Expand Up @@ -589,7 +586,7 @@ def add_limit_offset_to_user_filter(
return f" {filter_clause}"


def determine_field_types(df, weights):
def determine_field_types(df_db, df_query, weights):
numerical_features = []
boolean_features = []
categorical_features = []
Expand All @@ -598,23 +595,35 @@ def determine_field_types(df, weights):
boolean_weights = []
categorical_weights = []

for col, weight in zip(df.columns, weights):
for col, weight in zip(df_db.columns, weights):
# Check if the column can be converted to numeric
try:
# Attempt to convert to numeric
df[col] = pd.to_numeric(df[col], errors="raise")
temp_df_db = pd.to_numeric(df_db[col], errors="raise")
temp_df_query = pd.to_numeric(df_query[col], errors="raise")
# Replace empty values with 0 for numerical features
df_db[col] = temp_df_db.fillna(0).replace("", 0)
df_query[col] = temp_df_query.fillna(0).replace("", 0)
numerical_features.append(col)
numerical_weights.append(weight)
except ValueError:
# Check for boolean values
if df[col].str.lower().isin(["true", "false"]).all():
if (
df_db[col].str.lower().isin(["true", "false"]).all()
and df_query[col].str.lower().isin(["true", "false"]).all()
):
# Map to actual boolean values
df[col] = df[col].str.lower().map({"true": True, "false": False})
df_db[col] = df_db[col].str.lower().map({"true": True, "false": False})
df_query[col] = (
df_query[col].str.lower().map({"true": True, "false": False})
)
boolean_features.append(col)
boolean_weights.append(weight)
else:
categorical_features.append(col)
categorical_weights.append(weight)
# Replace empty values with 'missing' for categorical features
df_db[col] = df_db[col].replace("", "missing")
df_query[col] = df_query[col].replace("", "missing")

return (
numerical_features,
Expand All @@ -640,14 +649,7 @@ def vectorize_records(db_records, query_records, hash_features, weights):
numerical_weights,
boolean_weights,
categorical_weights,
) = determine_field_types(df_db, weights)

# Modify query dataframe boolean columns to True or False
for col in df_query.columns:
if df_query[col].str.lower().isin(["true", "false"]).all():
df_query[col] = (
df_query[col].str.lower().map({"true": True, "false": False})
)
) = determine_field_types(df_db, df_query, weights)

# Fit StandardScaler on the numerical features of the database records
scaler = StandardScaler()
Expand Down Expand Up @@ -705,13 +707,6 @@ def vectorize_records(db_records, query_records, hash_features, weights):
return final_db_vectors, final_query_vectors


def replace_empty_strings_with_missing(records):
return [
[(field if field != "" else "missing") for field in record]
for record in records
]


def split_and_filter_fields(fields: T.List[str]) -> T.Tuple[T.List[str], T.List[str]]:
# List to store non-lookup fields (load fields)
load_fields = []
Expand Down
4 changes: 4 additions & 0 deletions cumulusci/tasks/bulkdata/step.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,9 +478,11 @@ def select_records(self, records):
)

# Execute the main select query using Bulk API
self.logger.info("Retrieving records from org...")
select_query_records = self._execute_select_query(
select_query=select_query, query_fields=query_fields
)
self.logger.info(f"Retrieved {len(select_query_records)} from org")

query_records.extend(select_query_records)
# Post-process the query results
Expand Down Expand Up @@ -895,7 +897,9 @@ def select_records(self, records):
)

# Execute the query and gather the records
self.logger.info("Retrieving records from org...")
query_records = self._execute_soql_query(select_query, query_fields)
self.logger.info(f"Retrieved {len(query_records)} from org")

# Post-process the query results for this batch
(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
version: 1
interactions:
- &id001
include_file: GET_sobjects_Global_describe.yaml
- &id002
include_file: GET_sobjects_Account_describe.yaml
- *id001
- *id002
- *id002

- &id003
include_file: GET_sobjects_Contact_describe.yaml
- *id001
- *id003
- *id003
- &id007
include_file: GET_sobjects_Opportunity_describe.yaml
- *id002
- &id008
include_file: GET_sobjects_Lead_describe.yaml # Added interaction for Lead
- *id001
- &id009
include_file: GET_sobjects_Event_describe.yaml # Added interaction for Event
- *id001
- *id008
- *id001
- *id009
- *id001

- request:
method: GET
uri: https://orgname.my.salesforce.com/services/data/v62.0/limits/recordCount?sObjects=Account
body: null
headers: &id004
Request-Headers:
- Elided
response:
status:
code: 200
message: OK
headers: &id006
Content-Type:
- application/json;charset=UTF-8
Others: Elided
body:
string: "{\n \"sObjects\" : [ {\n \"count\" : 3,\n \"name\" : \"Account\"\n
\ } ]\n}"

- request:
method: GET
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id,%20Name,%20Description,%20Phone,%20AccountNumber%20FROM%20Account%20WHERE%20Name%20!=%20'Sample%20Account%20for%20Entitlements'
body: null
headers: *id004
response:
status:
code: 200
message: OK
headers: *id006
body:
string: "{\n \"totalSize\" : 10,\n \"done\" : true,\n \"records\" : [ {\n
\ \"attributes\" : {\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMDQA3\"\n
\ },\n \"Id\" : \"0019H00000H1RMDQA3\",\n \"Name\" : \"Tom Cruise\",\n
\ \"Description\" : \"Some Description\",\n \"Phone\" : \"12345632\",\n
\ \"AccountNumber\" : \"123\"\n }, {\n \"attributes\" : {\n \"type\"
: \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMEQA3\"\n
\ },\n \"Id\" : \"0019H00000H1RMEQA3\",\n \"Name\" : \"Bob The Builder\",\n
\ \"Description\" : \"Some Description\",\n \"Phone\" : \"12345632\",\n
\ \"AccountNumber\" : \"123\"\n }, {\n \"attributes\" : {\n \"type\"
: \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMFQA3\"\n
\ },\n \"Id\" : \"0019H00000H1RMFQA3\",\n \"Name\" : \"Shah Rukh Khan\",\n
\ \"Description\" : \"Bollywood actor\",\n \"Phone\" : \"12345612\",\n
\ \"AccountNumber\" : \"123\"\n }, {\n \"attributes\" : {\n \"type\"
: \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMGQA3\"\n
\ },\n \"Id\" : \"0019H00000H1RMGQA3\",\n \"Name\" : \"Aamir Khan\",\n
\ \"Description\" : \"Mr perfectionist, bollywood actor\",\n \"Phone\"
: \"12345623\",\n \"AccountNumber\" : \"123\"\n }, {\n \"attributes\"
: {\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMHQA3\"\n
\ },\n \"Id\" : \"0019H00000H1RMHQA3\",\n \"Name\" : \"Salman Khan\",\n
\ \"Description\" : \"Mr perfectionist, bollywood actor\",\n \"Phone\"
: \"12345623\",\n \"AccountNumber\" : \"123\"\n }, {\n \"attributes\"
: {\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1UzyQAF\"\n
\ },\n \"Id\" : \"0019H00000H1UzyQAF\",\n \"Name\" : \"Tom Cruise\",\n
\ \"Description\" : \"Some Description\",\n \"Phone\" : \"12345632\",\n
\ \"AccountNumber\" : \"123\"\n }, {\n \"attributes\" : {\n \"type\"
: \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1UzzQAF\"\n
\ },\n \"Id\" : \"0019H00000H1UzzQAF\",\n \"Name\" : \"Bob The Builder\",\n
\ \"Description\" : \"Some Description\",\n \"Phone\" : \"12345632\",\n
\ \"AccountNumber\" : \"123\"\n }, {\n \"attributes\" : {\n \"type\"
: \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1V00QAF\"\n
\ },\n \"Id\" : \"0019H00000H1V00QAF\",\n \"Name\" : \"Shah Rukh Khan\",\n
\ \"Description\" : \"Bollywood actor\",\n \"Phone\" : \"12345612\",\n
\ \"AccountNumber\" : \"123\"\n }, {\n \"attributes\" : {\n \"type\"
: \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1V01QAF\"\n
\ },\n \"Id\" : \"0019H00000H1V01QAF\",\n \"Name\" : \"Aamir Khan\",\n
\ \"Description\" : \"Mr perfectionist, bollywood actor\",\n \"Phone\"
: \"12345623\",\n \"AccountNumber\" : \"123\"\n }, {\n \"attributes\"
: {\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1V02QAF\"\n
\ },\n \"Id\" : \"0019H00000H1V02QAF\",\n \"Name\" : \"Salman Khan\",\n
\ \"Description\" : \"Mr perfectionist, bollywood actor\",\n \"Phone\"
: \"12345623\",\n \"AccountNumber\" : \"123\"\n } ]\n}"




- request:
method: POST
uri: https://orgname.my.salesforce.com/services/data/v62.0/composite/sobjects
body: '{"allOrNone": false, "records": [{"LastName": "Contact of Tom Cruise",
"AccountId": "0019H00000H1RMDQA3", "attributes": {"type": "Contact"}}, {"LastName":
"Contact of Bob the Builder", "AccountId": "0019H00000H1RMDQA3", "attributes":
{"type": "Contact"}}, {"LastName": "Contact of SRK", "AccountId": "0019H00000H1RMDQA3",
"attributes": {"type": "Contact"}}]}'
headers: *id004
response:
status:
code: 200
message: OK
headers: *id006
body:
string: "[ {\n \"id\" : \"0039H00000BbbFBQAZ\",\n \"success\" : true,\n \"errors\"
: [ ]\n}, {\n \"id\" : \"0039H00000BbbFCQAZ\",\n \"success\" : true,\n \"errors\"
: [ ]\n}, {\n \"id\" : \"0039H00000BbbFDQAZ\",\n \"success\" : true,\n \"errors\"
: [ ]\n} ]"


- request:
method: GET
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id%20FROM%20Account%20WHERE%20Name%20!=%20'Sample%20Account%20for%20Entitlements'%20LIMIT%205
body: null
headers: *id004
response:
status:
code: 200
message: OK
headers: *id006
body:
string: "{\n \"totalSize\" : 5,\n \"done\" : true,\n \"records\" : [ {\n
\ \"attributes\" : {\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMDQA3\"\n
\ },\n \"Id\" : \"0019H00000H1RMDQA3\"\n }, {\n \"attributes\" :
{\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMEQA3\"\n
\ },\n \"Id\" : \"0019H00000H1RMDQA3\"\n }, {\n \"attributes\" :
{\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMFQA3\"\n
\ },\n \"Id\" : \"0019H00000H1RMDQA3\"\n }, {\n \"attributes\" :
{\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMGQA3\"\n
\ },\n \"Id\" : \"0019H00000H1RMDQA3\"\n }, {\n \"attributes\" :
{\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMHQA3\"\n
\ },\n \"Id\" : \"0019H00000H1RMDQA3\"\n } ]\n}"
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
version: 1
interactions:
- &id001
include_file: GET_sobjects_Global_describe.yaml
- &id002
include_file: GET_sobjects_Account_describe.yaml
- *id001
- *id002
- *id002

- &id003
include_file: GET_sobjects_Contact_describe.yaml
- *id001
- *id003
- *id003
- &id007
include_file: GET_sobjects_Opportunity_describe.yaml
- *id002
- &id008
include_file: GET_sobjects_Lead_describe.yaml # Added interaction for Lead
- *id001
- &id009
include_file: GET_sobjects_Event_describe.yaml # Added interaction for Event
- *id001
- *id008
- *id001
- *id009
- *id001

- request:
method: GET
uri: https://orgname.my.salesforce.com/services/data/v62.0/limits/recordCount?sObjects=Account
body: null
headers: &id004
Request-Headers:
- Elided
response:
status:
code: 200
message: OK
headers: &id006
Content-Type:
- application/json;charset=UTF-8
Others: Elided
body:
string: "{\n \"sObjects\" : [ {\n \"count\" : 3,\n \"name\" : \"Account\"\n
\ } ]\n}"

- request:
method: GET
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id,%20Name,%20Description,%20Phone,%20AccountNumber%20FROM%20Account%20WHERE%20Name%20!=%20'Sample%20Account%20for%20Entitlements'
body: null
headers: *id004
response:
status:
code: 200
message: OK
headers: *id006
body:
string: "{\n \"totalSize\" : 10,\n \"done\" : true,\n \"records\" : [ {\n
\ \"attributes\" : {\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMDQA3\"\n
\ },\n \"Id\" : \"0019H00000H1RMDQA3\",\n \"Name\" : \"Tom Cruise\",\n
\ \"Description\" : \"Some Description\",\n \"Phone\" : \"12345632\",\n
\ \"AccountNumber\" : \"123\"\n }, {\n \"attributes\" : {\n \"type\"
: \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMEQA3\"\n
\ },\n \"Id\" : \"0019H00000H1RMEQA3\",\n \"Name\" : \"Bob The Builder\",\n
\ \"Description\" : \"Some Description\",\n \"Phone\" : \"12345632\",\n
\ \"AccountNumber\" : \"123\"\n }, {\n \"attributes\" : {\n \"type\"
: \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMFQA3\"\n
\ },\n \"Id\" : \"0019H00000H1RMFQA3\",\n \"Name\" : \"Shah Rukh Khan\",\n
\ \"Description\" : \"Bollywood actor\",\n \"Phone\" : \"12345612\",\n
\ \"AccountNumber\" : \"123\"\n }, {\n \"attributes\" : {\n \"type\"
: \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMGQA3\"\n
\ },\n \"Id\" : \"0019H00000H1RMGQA3\",\n \"Name\" : \"Aamir Khan\",\n
\ \"Description\" : \"Mr perfectionist, bollywood actor\",\n \"Phone\"
: \"12345623\",\n \"AccountNumber\" : \"123\"\n }, {\n \"attributes\"
: {\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1RMHQA3\"\n
\ },\n \"Id\" : \"0019H00000H1RMHQA3\",\n \"Name\" : \"Salman Khan\",\n
\ \"Description\" : \"Mr perfectionist, bollywood actor\",\n \"Phone\"
: \"12345623\",\n \"AccountNumber\" : \"123\"\n }, {\n \"attributes\"
: {\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1UzyQAF\"\n
\ },\n \"Id\" : \"0019H00000H1UzyQAF\",\n \"Name\" : \"Tom Cruise\",\n
\ \"Description\" : \"Some Description\",\n \"Phone\" : \"12345632\",\n
\ \"AccountNumber\" : \"123\"\n }, {\n \"attributes\" : {\n \"type\"
: \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1UzzQAF\"\n
\ },\n \"Id\" : \"0019H00000H1UzzQAF\",\n \"Name\" : \"Bob The Builder\",\n
\ \"Description\" : \"Some Description\",\n \"Phone\" : \"12345632\",\n
\ \"AccountNumber\" : \"123\"\n }, {\n \"attributes\" : {\n \"type\"
: \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1V00QAF\"\n
\ },\n \"Id\" : \"0019H00000H1V00QAF\",\n \"Name\" : \"Shah Rukh Khan\",\n
\ \"Description\" : \"Bollywood actor\",\n \"Phone\" : \"12345612\",\n
\ \"AccountNumber\" : \"123\"\n }, {\n \"attributes\" : {\n \"type\"
: \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1V01QAF\"\n
\ },\n \"Id\" : \"0019H00000H1V01QAF\",\n \"Name\" : \"Aamir Khan\",\n
\ \"Description\" : \"Mr perfectionist, bollywood actor\",\n \"Phone\"
: \"12345623\",\n \"AccountNumber\" : \"123\"\n }, {\n \"attributes\"
: {\n \"type\" : \"Account\",\n \"url\" : \"/services/data/v62.0/sobjects/Account/0019H00000H1V02QAF\"\n
\ },\n \"Id\" : \"0019H00000H1V02QAF\",\n \"Name\" : \"Salman Khan\",\n
\ \"Description\" : \"Mr perfectionist, bollywood actor\",\n \"Phone\"
: \"12345623\",\n \"AccountNumber\" : \"123\"\n } ]\n}"




- request:
method: POST
uri: https://orgname.my.salesforce.com/services/data/v62.0/composite/sobjects
body: '{"allOrNone": false, "records": [{"LastName": "Contact of Tom Cruise",
"AccountId": "0019H00000H1RMDQA3", "attributes": {"type": "Contact"}}, {"LastName":
"Contact of Bob the Builder", "AccountId": "0019H00000H1RMEQA3", "attributes":
{"type": "Contact"}}, {"LastName": "Contact of SRK", "AccountId": "0019H00000H1RMFQA3",
"attributes": {"type": "Contact"}}]}'
headers: *id004
response:
status:
code: 200
message: OK
headers: *id006
body:
string: "[ {\n \"id\" : \"0039H00000BbbFBQAZ\",\n \"success\" : true,\n \"errors\"
: [ ]\n}, {\n \"id\" : \"0039H00000BbbFCQAZ\",\n \"success\" : true,\n \"errors\"
: [ ]\n}, {\n \"id\" : \"0039H00000BbbFDQAZ\",\n \"success\" : true,\n \"errors\"
: [ ]\n} ]"
Loading
Loading