Fuzzy-dates: EBNF for fuzzy-dates

2 hours ago 2
(* Fuzzy Date Syntax in EBNF — Extended Version with Time, Timezone, Notes, Multiple Ranges, Parenthesized Uncertainty, Century Support, Open-ended Ranges, Dashed Temporal Qualifiers, Decades ('s' qualifier), Historical Style Notes (os/ns), Explicit Calendar System Definitions, Week/Quarter/Half-Year Numbers, Ordinal Day-of-Week Expressions, Named Timezones, Partial Dates, Time-Only Expressions, Probability Distributions, Nested Uncertainty, and Geo-Temporal Qualifiers *)
fuzzy_date_expression = temporal_expression_unit , { "|" , temporal_expression_unit } ;
(* Top-level rule: Allows a single temporal expression or multiple expressions combined with '|' for choices *)
temporal_expression_unit = date_expr
| time_only_expr
| century_expr
| decade_expr
| numeric_timeline_point
| day_of_year_expr
| week_expr
| quarter_expr
| half_year_expr
| ordinal_day_expr
| range_expr
| partial_date_expr ;
range_expr = ( temporal_expression_boundary ) , ".." ,
( temporal_expression_boundary ) ;
(* General range definition. Boundaries can be any date, century, decade, week, quarter, half-year, ordinal day, or an implied/fuzzy boundary. *)
temporal_expression_boundary = date_expr
| century_expr
| decade_expr
| day_of_year_expr
| week_expr
| quarter_expr
| half_year_expr
| ordinal_day_expr
| empty_date_boundary
| "?" ;
empty_date_boundary = "" ; (* Represents an implied start or end to a range (e.g., "..2020") *)
temporal_qualifier = "Early" | "Mid" | "Late" ; (* For semantic fuzziness like "Early-2020" *)
uncertainty = "(" , uncertainty_content , ")" ;
uncertainty_content = uncertainty_symmetric | uncertainty_asymmetric | distribution_details ;
uncertainty_symmetric = "±" , number , [uncertainty_unit] ;
uncertainty_asymmetric = "+" , number , [uncertainty_unit] , "-" , number , [uncertainty_unit] ;
uncertainty_unit = ("y" | "Q" | "m" | "d" | "h" | "min" | "s" ) ; (* Common units for uncertainty: year, quarter, month, day, hour, minute, second *)
distribution_details = distribution_name , "(" , distribution_param_list , ")" ;
distribution_name = "normal" | "uniform" | "triangular" ;
distribution_param_list = distribution_param , { "," , distribution_param } ;
distribution_param = label , "=" , ( number | date_expr ) ; (* Parameters can be numbers or dates *)
label = (letter | digit | "_") , { letter | digit | "_" } ; (* Label for distribution parameters *)
date_expr = [ "~" ] , [ temporal_qualifier , "-" ] , date_prefix , { uncertainty } ,
[ time_component ] , [ timezone_component ] , [ geo_qualifier ] ,
[ note_component ] , [ style_note ] , [ calendar_system ] ;
(* Date expression with optional approximation, dashed temporal qualifier, multiple uncertainties, time, timezone, geo-qualifier, note, style, and calendar system *)
partial_date_expr = "?" , "-" , month_component , [ time_component ] , [ geo_qualifier ] , [ note_component ] , [ style_note ] , [ calendar_system ]
| "?" , "-" , "?" , "-" , day_component , [ time_component ] , [ geo_qualifier ] , [ note_component ] , [ style_note ] , [ calendar_system ] ;
date_prefix = ( year , [ "-" , month_component ] , [ "-" , day_component ] )
| ( season_name , "-" , year ) ;
season_name = "Spring" | "Summer" | "Autumn" | "Winter" ; (* Defines season names *)
century_expr = [ "~" ] , [ temporal_qualifier , "-" ] , century_number , "C" , { uncertainty } ,
[ style_note ] , [ calendar_system ] ;
(* Century expression with optional approximation, dashed temporal qualifier, uncertainty, style, and calendar system *)
century_number = integer_number | temporal_integer_choice ;
decade_expr = [ "~" ] , [ temporal_qualifier , "-" ] , decade_number , "s" , { uncertainty } ,
[ style_note ] , [ calendar_system ] ;
(* Decade expression with optional approximation, dashed temporal qualifier, uncertainty, style, and calendar system *)
decade_number = digit , digit , digit ; (* Represents the first three digits of a decade, e.g., '197' for 1970s *)
day_of_year_expr = [ "~" ] , "D" , day_number_in_year , "-" , year , { uncertainty } ,
[ note_component ] , [ style_note ] , [ calendar_system ] ;
(* Specific day number within a year, e.g., D123-2022 *)
day_number_in_year = digit , [ digit ] , [ digit ] ; (* Allows 1-3 digits. Semantic validation for max day number is external. *)
week_expr = [ "~" ] , [ temporal_qualifier , "-" ] , "W" , digit , digit , "-" , year , [ "-" , "D-" , day_of_week_number_periodic ] , { uncertainty } ,
[ note_component ] , [ style_note ] , [ calendar_system ] ;
(* Week number expression, e.g., W12-2022, ~Early-W01-2023, W12-2022-D-1 *)
quarter_expr = [ "~" ] , [ temporal_qualifier , "-" ] , "Q" , ("1" | "2" | "3" | "4") , "-" , year , { uncertainty } ,
[ note_component ] , [ style_note ] , [ calendar_system ] ;
(* Quarter number expression, e.g., Q2-2022, ~Late-Q4-2024 *)
half_year_expr = [ "~" ] , [ temporal_qualifier , "-" ] , "H" , ("1" | "2") , "-" , year , { uncertainty } ,
[ note_component ] , [ style_note ] , [ calendar_system ] ;
(* Half-year expression, e.g., H1-2022, ~H2-2023 *)
ordinal_day_expr = [ "~" ] , ( ordinal_day_single | ordinal_day_range ) , { uncertainty } ,
[ note_component ] , [ style_note ] , [ calendar_system ] ;
(* Ordinal day-of-week expression, e.g., 1º-Mon-2022, [1º..3º]-Mon-H2-2023 *)
ordinal_day_single = ordinal_number , "-" , day_of_week_short , "-" , ordinal_temporal_period_reference ;
ordinal_day_range = "[" , ordinal_number , ".." , ordinal_number , "]" , "-" , day_of_week_short , "-" , ordinal_temporal_period_reference ;
ordinal_number = digit , { digit } , "º" ; (* 1º, 2º, 3º, 11º, 21º *)
day_of_week_short = "Mon" | "Tue" | "Wed" | "Thu" | "Fri" | "Sat" | "Sun" ; (* Short 3-letter forms *)
day_of_week_number_periodic = "1" | "2" | "3" | "4" | "5" | "6" | "7" ; (* Day number within a periodic structure, 1=Monday *)
ordinal_temporal_period_reference = year
| date_prefix
| decade_expr
| century_expr
| week_expr
| quarter_expr
| half_year_expr ;
numeric_timeline_point = [ "~" ] , number , { uncertainty } , [ note_component ] ;
(* For cosmic or abstract time points *)
time_only_expr = "T" , time_component , [ timezone_component ] , [ geo_qualifier ] , [ note_component ] , [ style_note ] , [ calendar_system ] ;
geo_qualifier = "@" , (location_name | geo_coordinates) ;
location_name = { character_in_location_name } ;
character_in_location_name = letter | digit | " " | "/" | "-" | "_" ;
geo_coordinates = "geo:" , number , "," , number ;
weighted_date_part = (year | month_number_literal | day_number_literal | century_number_literal | hour_literal | minute_literal | second_literal) , "*" , percentage ;
weighted_date_part_choice = "[" , weighted_date_part , { "-" , weighted_date_part } , "]" ;
temporal_integer_choice = "[" , ( integer_range | integer_discrete ) , "]" ;
integer_range = integer_number , ".." , integer_number ;
integer_discrete = integer_number , { "|" , integer_number } ;
month_number_literal = "0" , ("1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9") | "1" , ("0" | "1" | "2") ; (* Stricter: 01-12 *)
month_component = month_number_literal | "?" | weighted_date_part_choice | multi_date_choice | temporal_integer_choice ;
day_number_literal = "0" , ("1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9") | ("1" | "2") , digit | "3" , ("0" | "1") ; (* Stricter: 01-31 *)
day_component = day_number_literal | "?" | day_choice_component | weighted_date_part_choice | temporal_integer_choice ;
time_component = hour_component , ":" , minute_component , [ ":" , second_component , [ fractional_seconds ] ] ;
hour_literal = ("0" | "1") , digit | "2" , ("0" | "1" | "2" | "3") ; (* Stricter: 00-23 *)
hour_component = hour_literal | "?" | weighted_date_part_choice | temporal_integer_choice ;
minute_literal = ("0" | "1") , digit | "2" , ("3" | "4" | "5") , digit ; (* Stricter: 00-59 *)
minute_component = minute_literal | "?" | weighted_date_part_choice | temporal_integer_choice ;
second_literal = ("0" | "1") , digit | "2" , ("3" | "4" | "5") , digit ; (* Stricter: 00-59 *)
second_component = second_literal | "?" | weighted_date_part_choice | temporal_integer_choice ;
fractional_seconds = "." , digit , { digit } ;
timezone_component = ("Z" | ("+" | "-") , tz_hour , ":" , tz_minute | (letter , letter , letter))
| "[" , (timezone_full_id | timezone_transition) , "]" ;
timezone_full_id = { character_in_timezone_id } ;
character_in_timezone_id = letter | digit | "_" | "/" | "+" | "-" ;
timezone_transition = (timezone_full_id) , "" , (timezone_full_id) ;
note_component = "#" , { character } ;
style_note = "(os)" | "(ns)" ; (* Simplified: Old Style (O.S., st. v.) / New Style (N.S., st. n.) dating note *)
calendar_system = "(" , calendar_id , ")" ;
calendar_id = { character_in_calendar_id } ;
character_in_calendar_id = ? any printable character except ')' and newline ? ;
multi_date_choice = "[" , date_choice , { "|" , date_choice } , "]" ;
date_choice = date_pair
| day_range ;
date_pair = month_number_literal , "-" , day_number_literal ;
day_range = day_number_literal , ".." , [ "~" ] , day_number_or_wildcard ;
day_number_or_wildcard = day_number_literal | "?" ;
year = integer_number | "?" | temporal_integer_choice ;
century_number_literal = digit , { digit } ;
integer_number = [ "-" ] , digit , { digit } ;
percentage = "100%" | digit , digit , "%" ;
number = [ "-" ] , digit , { digit_or_underscore } , [ "." , digit , { digit } ] ;
digit_or_underscore = digit | "_" ;
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
letter = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" ;
character = ? any printable character except newline ? ;
Read Entire Article