-
Notifications
You must be signed in to change notification settings - Fork 4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
90% - Prevent null values being added to ExtendedGraph #82
Changes from 1 commit
1b8edf8
ec3fb44
5c89879
ac2071e
54fbb4f
cdb99be
3a963e4
a8cfc26
6a96dcb
460f258
5396f60
60d0e3c
a895200
c8ef3f6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -129,7 +129,11 @@ private function add_tarray_to_index($tarray) | |
if($key[0] != '_') | ||
{ | ||
$predicate = $this->qname_to_uri($key); | ||
$predObjects[$predicate] = $this->toGraphValueObject($value); | ||
$graphValueObject = $this->toGraphValueObject($value); | ||
// Only add if valid values have been found | ||
if($graphValueObject !== false) { | ||
$predObjects[$predicate] = $graphValueObject; | ||
} | ||
} | ||
} | ||
$_i[$this->_labeller->qname_to_alias($tarray["_id"][_ID_RESOURCE])] = $predObjects; | ||
|
@@ -138,39 +142,56 @@ private function add_tarray_to_index($tarray) | |
|
||
/** | ||
* Convert from Tripod value object format (comapct) to ExtendedGraph format (verbose) | ||
* | ||
* @param array $mongoValueObject | ||
* @return array | ||
* @return array| false an array of values or false if the value is not valid | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
*/ | ||
private function toGraphValueObject($mongoValueObject) | ||
{ | ||
$simpleGraphValueObject = null; | ||
$simpleGraphValueObject = array(); | ||
|
||
if (array_key_exists(VALUE_LITERAL,$mongoValueObject)) | ||
{ | ||
// single value literal | ||
$simpleGraphValueObject[] = array( | ||
'type'=>'literal', | ||
'value'=>$mongoValueObject[VALUE_LITERAL]); | ||
// only allow valid values | ||
if($this->isValueValid($mongoValueObject[VALUE_LITERAL])){ | ||
// single value literal | ||
$simpleGraphValueObject[] = array( | ||
'type'=>'literal', | ||
'value'=>$mongoValueObject[VALUE_LITERAL]); | ||
} | ||
} | ||
else if (array_key_exists(VALUE_URI,$mongoValueObject)) | ||
{ | ||
// single value literal | ||
$simpleGraphValueObject[] = array( | ||
'type'=>'uri', | ||
'value'=>$this->_labeller->qname_to_alias($mongoValueObject[VALUE_URI])); | ||
// only allow valid values | ||
if($this->isValueValid($mongoValueObject[VALUE_URI])) { | ||
// single value uri | ||
$simpleGraphValueObject[] = array( | ||
'type' => 'uri', | ||
'value' => $this->_labeller->qname_to_alias($mongoValueObject[VALUE_URI])); | ||
} | ||
} | ||
else | ||
{ | ||
// If we have an array of values | ||
foreach ($mongoValueObject as $kvp) | ||
{ | ||
foreach ($kvp as $type=>$value) | ||
{ | ||
// Only add valid values | ||
if(!$this->isValueValid($value)){ | ||
continue; | ||
} | ||
$simpleGraphValueObject[] = array( | ||
'type'=>($type==VALUE_LITERAL) ? 'literal' : 'uri', | ||
'value'=>($type==VALUE_URI) ? $this->_labeller->qname_to_alias($value) : $value); | ||
} | ||
} | ||
} | ||
// If we don't have any values, then respond with false | ||
if(empty($simpleGraphValueObject)){ | ||
return false; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we returning false here? Instead why not return empty array (as before) and have the calling code do the empty()? Returning false to indicate no values out of this method seems very php4. We've unfortunately inherited methods like that from ExtendedGraph (which came from another library) but we don't have to perpetuate the behaviour. |
||
} | ||
// Otherwise we have found valid values | ||
return $simpleGraphValueObject; | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
change name to
isValidLiteralValue()
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wouldn't we want to check resource values as well though?
isValidTripleValue
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
From Tripod's perspective, don't we just need to ensure that subject, predicate, and object are all strings?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
non-empty strings
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agree, but enforcing strings as opposed to null could be very painful for any graphs which are already storing non-string values (eg. integers). This current PR to ignore null values could well cause enough headaches without taking out integers at the same time. I do think we should ultimately enforce strings-only, but maybe one step at a time, once we are comfortable with no nulls.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Or arrays of strings and integers.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess what I'm saying is that we should be doing type checking here so we're not just allowing some other bad data that is equally as incompatible as a null value.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How do we deal with legacy data, though? We have no idea what crazy things have ended up in the graph. The php data types are:
Integer, floats and booleans could all have ended up in the graph. Is it possible to add an array via the public ExtendedGraph methods?
Agree that really we should be ignoring anything that isn't a string, but I guess the question is how much we are prepared to break anything currently using tripod to enforce this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Tripod only supports plain literals. Full-stop. If other data types are in the documents they would produce invalid RDF, anyway, because we do not support typed literals at all.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Discussed this with @rsinger Agreed to continue with the current approach. I have just tested to see if add_to_literal can accept arrays, and it can, so we can't simply ignore any non-scalar.