-
-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add XXXX humanization #88
base: master
Are you sure you want to change the base?
Changes from all commits
72e9cc4
5ffc831
6199d54
80bee4e
57252e1
35ee9fc
ee0bc9b
95e812b
b5ca5fc
35ad309
7d40066
bf5dbb4
bd86be4
2daa0a3
0e58e88
234ebb4
d569714
0d6a4e6
edb98e4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -215,10 +215,7 @@ private function humanizeDateWithoutUncertainty( ExtDate $date ): string { | |
$day = $date->getDay(); | ||
|
||
if ( $year !== null ) { | ||
$year = $this->humanizeYear( | ||
$year, | ||
$date->getUnspecifiedDigit() | ||
); | ||
$year = $this->humanizeYear( $year, $date ); | ||
} | ||
|
||
if ( $month !== null ) { | ||
|
@@ -247,7 +244,55 @@ private function humanizeYearMonthDay( ?string $year, ?string $month, ?string $d | |
); | ||
} | ||
|
||
private function humanizeYear( int $year, UnspecifiedDigit $unspecifiedDigit ): string { | ||
private function scaleToMessageKey( int $scale ): string { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yay for putting this in its own method! |
||
switch( $scale ) { | ||
case 1 : return 'edtf-year'; // X | ||
case 2 : return 'edtf-decade'; // XX | ||
case 3 : return 'edtf-century'; // XXX | ||
case 4 : return 'edtf-millennium'; // XXXX | ||
case 5 : return 'edtf-decem-millennium'; // XXXXX | ||
case 6 : return 'edtf-hundreds-of-thousands'; // XXXXXX | ||
case 7 : return 'edtf-million'; // XXXXXXX | ||
case 8 : return 'edtf-tens-of-millions'; // XXXXXXXX | ||
case 9 : return 'edtf-hundreds-of-millions'; // XXXXXXXXX | ||
case 10 : return 'edtf-billion'; // XXXXXXXXXX | ||
case 11 : return 'edtf-tens-of-billions'; // XXXXXXXXXXX | ||
case 12 : return 'edtf-hundreds-of-billions'; // XXXXXXXXXXXX | ||
case 13 : return 'edtf-trillion'; // XXXXXXXXXXXXX | ||
} | ||
|
||
// FIXME: reuse recursively the scale with trillions | ||
// e.g. tens-of-trillions etc., | ||
return 'edtf-tens-of-trillions'; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's first make sure we actually need this. I am dubious that these are valid EDTF dates. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure, the only sensible use-case is in cosmology There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
actually they are required only for testing purposes, e.g. by an user who wonder what is the output entering something like that 56XXXXXXXXXXXXXXXXXXX |
||
} | ||
|
||
private function humanizeYear( int $year, ExtDate $date ): string { | ||
$unspecifiedYearScale = $date->getUnspecifiedYearScale(); | ||
$unspecifiedDigit = $date->getUnspecifiedDigit(); | ||
$specifiedYears = $date->getSpecifiedYears(); | ||
|
||
if ( $unspecifiedYearScale === 0 || | ||
( $this->needsYearEndingChar( $unspecifiedDigit ) && $specifiedYears !== 0 ) ) { | ||
return $this->humanizeYearSpecified( $year, $unspecifiedDigit ); | ||
} | ||
|
||
$specifiedYearsStr = (string)abs( $specifiedYears ); | ||
|
||
$ret = ( $specifiedYears === 0 && $unspecifiedYearScale != 0 ? $this->message( "edtf-date-unspecified" ) | ||
: $specifiedYearsStr ); | ||
|
||
if ( $unspecifiedYearScale > 0 ) { | ||
$ret .= " " . $this->message( $this->scaleToMessageKey( $unspecifiedYearScale ), $specifiedYearsStr ); | ||
} | ||
|
||
if ( $date->isBC() ) { | ||
$ret .= " " . $this->message( "edtf-date-BC" ); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Concatenating messages like this bakes in an assumption about the order of these messages, which might not hold in all languages. That is why for instance we have messages such as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, I guessed something like that, so that should be taken into account and enhanced |
||
|
||
return $ret; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
} | ||
|
||
private function humanizeYearSpecified( int $year, UnspecifiedDigit $unspecifiedDigit ): string { | ||
$yearStr = (string)abs( $year ); | ||
|
||
if ( $year <= -1000 ) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,6 +33,26 @@ public function humanizationProvider(): Generator { | |
|
||
yield 'Month only' => [ 'XXXX-12-XX', 'December' ]; | ||
yield 'Day only' => [ 'XXXX-XX-12', '12th' ]; | ||
|
||
// https://github.com/ProfessionalWiki/EDTF/issues/80 | ||
yield 'Some year (4 digits)' => [ 'XXXX', 'some millennium' ]; | ||
|
||
yield 'Some year (1 digit)' => [ 'X', 'some year' ]; | ||
yield 'Some year (2 digit)' => [ 'XX', 'some decade' ]; | ||
yield 'Some year (3 digits)' => [ 'XXX', 'some century' ]; | ||
yield 'Scales (4 digits minus)' => [ '-5XXXX', '5 decem millennia BC' ]; | ||
yield 'Scales (5 digits)' => [ '5XXXXX', '5 hundreds of thousands' ]; | ||
yield 'Scales (6 digits)' => [ '5XXXXXX', '5 millions' ]; | ||
yield 'Scales (7 digits)' => [ '5XXXXXXX', '5 tens of millions' ]; | ||
yield 'Scales (8 digits)' => [ '5XXXXXXXX', '5 hundreds of millions' ]; | ||
yield 'Scales (9 digits)' => [ '5XXXXXXXXX', '5 billions' ]; | ||
yield 'Scales (10 digits)' => [ '5XXXXXXXXXX', '5 tens of billions' ]; | ||
yield 'Scales (11 digits)' => [ '5XXXXXXXXXXX', '5 hundreds of billions' ]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "5 decem millennia"? What does "5 hundreds of thousands" is just an amount; should it not also have "years" in there somewhere? Is this even a valid EDTF date to begin with? @mzeinstra There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, sometimes the word "year" is missing, I have still to add it where necessary There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
"Decem millennium" theoretically means "Ten thousand years" it could be also used the expression "tens of thousands" There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tens of thousands is indeed better, but we can also leave that up to the translators. Cosmology seems to me a valid border of amounts of years that we humanise. Maybe a fallback to just presenting the years in numbers? The most years I can find in the sciences is the half-life of Xenon, which is 18 billion trillion years :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As we should not forget the objective here, that is to make these edtf-string human readable. If 99% of the use cases can be captured than that is ok with me. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Previously I also posted this link https://asistdl.onlinelibrary.wiley.com/doi/epdf/10.1002/pra2.552 in the element.io SemanticMediawiki group because somebody asked about the support for "geological and historical eras". Regarding the following
in my opinion if the user registers a date with unspecified digits and the library outputs a human readable format, and additionally computes correctly comparisons and intervals, that's really useful. Still in my opinion the approach to present a year with mixed unspecified digits using numbers for the specified digits and literals for the unspecified (scaled) part, seems also formally rigorous. So using the example above -5XXXXXXX is not to be precise "50 millions years BC" but "5 tens of millions years BC" (in the output the number represents the specified digits, and the literal part the unspecified digits) So what about, when this is more refined, to publish a demo (which could query the library using Ajax) and to propose this approach to the authors of the format, the Washington' Library of Congress, as far as I know ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Afaik there is no standard for the humanisation of EDTF, humanised examples are used on the library of congres website but they are not the owner of the standard. So your best efforts here will de facto help set the humanisation standard. @mnhn-paul you might be able to provide us with a standard of humanising eras in EDTF. Eg. What would the date: -5XXXXXXX be humanised to? Paul is from our museum of natural history and might have more insight into this. |
||
|
||
// TODO throw error | ||
// yield 'Scales (throw error 1)' => [ 'XXXXXXXXXX4', '' ]; | ||
// yield 'Scales (throw error 1)' => [ 'XXXXXX4XXXX', '' ]; | ||
|
||
yield 'Month and day' => [ 'XXXX-12-11', 'December 11th' ]; | ||
yield 'Year and day' => [ '2020-XX-11', '11th of unknown month, 2020' ]; | ||
yield 'Unspecified year decade' => [ '197X', '1970s' ]; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can apply https://www.entropywins.wtf/blog/2019/01/14/readable-functions-guard-clause/
$ret
is not a good var name