Coverage for datesat / constraint_parser.py: 74.1%
701 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-10 23:47 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-10 23:47 +0000
1"""
2Constraint parser for DateSAT system using Lark parser generator.
4This module provides functionality to parse constraints from the structured format
5and convert them into executable DateSATBuilder code using a context-free grammar.
6"""
8import re
9from typing import Any, Dict, List, Union
11from lark import Lark, Transformer
14class ConstraintTransformer(Transformer):
15 """Transformer to convert Lark parse tree to Python code."""
17 def __init__(self, variable_types: Dict[str, str] = None):
18 """
19 Initialize transformer with variable type information.
21 Args:
22 variable_types: Dictionary mapping variable names to types ('date', 'int', 'bool')
23 """
24 super().__init__()
25 self.variable_types = variable_types or {}
27 def constraint(self, items) -> str:
28 """Transform constraint (top level)."""
29 bool_expr = items[0]
31 # Check if bool_expr already has builder.add_constraint wrapper (from implication)
32 if bool_expr.startswith("builder.add_constraint("): 32 ↛ 33line 32 didn't jump to line 33 because the condition on line 32 was never true
33 return bool_expr
35 return f"builder.add_constraint({bool_expr})"
37 def implication(self, items) -> str:
38 """Transform implication A -> B into Implies(A, B)."""
39 # Earley parser might include the -> token, filter it out
40 filtered = [item for item in items if str(item) != "->"]
41 if len(filtered) == 2: 41 ↛ 46line 41 didn't jump to line 46 because the condition on line 41 was always true
42 left = str(filtered[0])
43 right = str(filtered[1])
44 else:
45 # Fallback: use first and last items
46 left = str(items[0])
47 right = str(items[-1])
49 # Strip outermost parentheses from arguments if present
50 # e.g., (a == b) -> (c == d) becomes Implies(a == b, c == d)
51 left = self._strip_outer_parens(left)
52 right = self._strip_outer_parens(right)
54 return f"Implies({left}, {right})"
56 def _strip_outer_parens(self, expr: str) -> str:
57 """Strip outermost parentheses from an expression if they wrap the entire expression."""
58 expr = expr.strip()
59 if expr.startswith("(") and expr.endswith(")"):
60 # Check if these are the outermost matching parentheses
61 depth = 0
62 for i, char in enumerate(expr):
63 if char == "(":
64 depth += 1
65 elif char == ")":
66 depth -= 1
67 # If depth hits 0 before the end, these aren't the outermost parens
68 if depth == 0 and i < len(expr) - 1: 68 ↛ 69line 68 didn't jump to line 69 because the condition on line 68 was never true
69 return expr
70 # If we get here, the parentheses wrap the whole expression
71 return expr[1:-1]
72 return expr
74 def or_op(self, items) -> str:
75 """Transform OR operation A || B into Or(A, B)."""
76 # Earley parser might include the operator token, filter it out
77 filtered = [item for item in items if str(item) not in ("||", "or", "OR")]
78 if len(filtered) == 2: 78 ↛ 83line 78 didn't jump to line 83 because the condition on line 78 was always true
79 left = str(filtered[0])
80 right = str(filtered[1])
81 else:
82 # Fallback: use first and last items
83 left = str(items[0])
84 right = str(items[-1])
86 # Strip outermost parentheses from arguments
87 left = self._strip_outer_parens(left)
88 right = self._strip_outer_parens(right)
90 return f"Or({left}, {right})"
92 def and_op(self, items) -> str:
93 """Transform AND operation A && B into And(A, B)."""
94 # Earley parser might include the operator token, filter it out
95 filtered = [item for item in items if str(item) not in ("&&", "and", "AND")]
96 if len(filtered) == 2: 96 ↛ 101line 96 didn't jump to line 101 because the condition on line 96 was always true
97 left = str(filtered[0])
98 right = str(filtered[1])
99 else:
100 # Fallback: use first and last items
101 left = str(items[0])
102 right = str(items[-1])
104 # Strip outermost parentheses from arguments
105 left = self._strip_outer_parens(left)
106 right = self._strip_outer_parens(right)
108 return f"And({left}, {right})"
110 def not_op(self, items) -> str:
111 """Transform NOT operation !A / not A into Not(A)."""
112 # items = [NOT token, not_expr] or [not_expr] if NOT token is stripped
113 filtered = [item for item in items if str(item) not in ("!", "not", "NOT")]
114 if filtered: 114 ↛ 117line 114 didn't jump to line 117 because the condition on line 114 was always true
115 expr = str(filtered[0])
116 else:
117 expr = str(items[-1])
119 # Strip outermost parentheses from argument
120 expr = self._strip_outer_parens(expr)
122 return f"Not({expr})"
124 def eq_bool(self, items) -> str:
125 """Transform boolean equality A == B."""
126 filtered = [item for item in items if str(item) != "=="]
127 if len(filtered) == 2: 127 ↛ 131line 127 didn't jump to line 131 because the condition on line 127 was always true
128 left = str(filtered[0])
129 right = str(filtered[1])
130 return f"{left} == {right}"
131 return f"{items[0]} == {items[-1]}"
133 def ne_bool(self, items) -> str:
134 """Transform boolean inequality A != B."""
135 filtered = [item for item in items if str(item) != "!="]
136 if len(filtered) == 2: 136 ↛ 140line 136 didn't jump to line 140 because the condition on line 136 was always true
137 left = str(filtered[0])
138 right = str(filtered[1])
139 return f"{left} != {right}"
140 return f"{items[0]} != {items[-1]}"
142 def bool_atom(self, items) -> str:
143 """Transform boolean atom (comparison, variable, bool literal, or parenthesized bool expr)."""
144 # Handle parenthesized expressions: LPAR bool_expr RPAR
145 # Always preserve parentheses from input
146 if len(items) == 3 and str(items[0]) == "(" and str(items[2]) == ")": 146 ↛ 148line 146 didn't jump to line 148 because the condition on line 146 was always true
147 return f"({items[1]})"
148 if len(items) == 1:
149 return str(items[0])
150 return " ".join(str(item) for item in items)
152 def date_comparison(self, items) -> str:
153 """Transform date comparison expression."""
154 return self._comparison_helper(items)
156 def int_comparison(self, items) -> str:
157 """Transform int comparison expression."""
158 return self._comparison_helper(items)
160 def _comparison_helper(self, items) -> str:
161 """Helper for transforming comparison expressions (both date and int)."""
162 left, op, right = items
164 # Convert to strings (items could be Tree objects if not fully transformed)
165 left_str = str(left)
166 op_str = str(op)
167 right_str = str(right)
169 # Check if we need to transform parametric Date() comparisons
170 # (Date constructors with variable arguments, e.g., Date(x, 2, 1))
171 transformed = self._transform_parametric_date_comparison(
172 left_str, op_str, right_str
173 )
174 if transformed:
175 return transformed
176 return f"{left_str} {op_str} {right_str}"
178 def bool_literal(self, items) -> str:
179 """Transform boolean literal True/False."""
180 return str(items[0])
182 def _extract_date_components(self, expr: str) -> List[str]:
183 """
184 Extract year, month, day arguments from a Date(...) constructor expression.
186 Args:
187 expr: An expression that may contain Date(year, month, day)
189 Returns:
190 List of [year, month, day] argument strings, or None if not a valid Date()
191 """
192 # Find Date( and match balanced parentheses
193 date_start = expr.find("Date(")
194 if date_start == -1:
195 return None
197 # Find the opening paren after Date
198 start = date_start + 5 # len("Date(")
199 paren_count = 0
200 args = []
201 current_arg = []
202 i = start
204 while i < len(expr): 204 ↛ 225line 204 didn't jump to line 225 because the condition on line 204 was always true
205 char = expr[i]
206 if char == "(": 206 ↛ 207line 206 didn't jump to line 207 because the condition on line 206 was never true
207 paren_count += 1
208 current_arg.append(char)
209 elif char == ")":
210 if paren_count == 0: 210 ↛ 215line 210 didn't jump to line 215 because the condition on line 210 was always true
211 # This is the closing paren for Date()
212 if current_arg: 212 ↛ 214line 212 didn't jump to line 214 because the condition on line 212 was always true
213 args.append("".join(current_arg).strip())
214 break
215 paren_count -= 1
216 current_arg.append(char)
217 elif char == "," and paren_count == 0:
218 # This comma separates arguments
219 args.append("".join(current_arg).strip())
220 current_arg = []
221 else:
222 current_arg.append(char)
223 i += 1
225 if len(args) == 3: 225 ↛ 227line 225 didn't jump to line 227 because the condition on line 225 was always true
226 return args
227 return None
229 @staticmethod
230 def _has_variable(expr: str) -> bool:
231 """
232 Check if expression contains a variable (not just a literal number).
234 Used to determine if a Date() constructor is parametric.
235 """
236 expr = expr.strip()
237 # If it's just a number, it has no variable
238 if re.match(r"^-?\d+$", expr):
239 return False
240 # If it contains identifier characters, it has a variable
241 return bool(re.search(r"[a-zA-Z_][a-zA-Z0-9_]*", expr))
243 def _transform_parametric_date_comparison(
244 self, left: str, op: str, right: str
245 ) -> str:
246 """
247 Transform comparisons involving parametric Date() constructors.
249 A parametric Date() has one or more variable arguments, e.g., Date(x, 2, 1).
250 This is different from a date variable (e.g., `k` where `k: date`).
252 Transforms:
253 k >= Date(x+1, 1, 1) → component-wise comparison on k.year, k.month, k.day
254 k == Date(x, 1, 2) → And(k.year == x, k.month == 1, k.day == 2)
256 Args:
257 left: Left side of comparison (e.g., "k" or "Date(x, 2, 1)")
258 op: Comparison operator (==, !=, >=, <=, >, <)
259 right: Right side of comparison
261 Returns:
262 Transformed constraint string, or None if no transformation needed
263 """
264 import re
266 # Check if right side is a Date() constructor
267 date_components = self._extract_date_components(right)
268 date_var_expr = left.strip()
269 swapped = False
271 if not date_components:
272 # Check if left side is a Date() constructor
273 date_components = self._extract_date_components(left)
274 if date_components:
275 # Swap left and right, invert operator
276 date_var_expr = right.strip()
277 op = self._invert_operator(op)
278 swapped = True
279 else:
280 return None
282 year_expr, month_expr, day_expr = date_components
284 # If all components are concrete (no variables), no transformation needed
285 if not (
286 self._has_variable(year_expr)
287 or self._has_variable(month_expr)
288 or self._has_variable(day_expr)
289 ):
290 return None
292 # Extract the date variable name (handle property access like x.year)
293 date_var_match = re.match(r"^([a-zA-Z_][a-zA-Z0-9_]*)", date_var_expr)
294 if not date_var_match: 294 ↛ 295line 294 didn't jump to line 295 because the condition on line 294 was never true
295 return None
297 date_var = date_var_match.group(1)
299 # Transform based on operator
300 if op == "==": 300 ↛ 303line 300 didn't jump to line 303 because the condition on line 300 was always true
301 # Equality: all components must match
302 return f"And({date_var}.year == {year_expr}, {date_var}.month == {month_expr}, {date_var}.day == {day_expr})"
303 elif op == "!=":
304 # Inequality: at least one component differs
305 return f"Or({date_var}.year != {year_expr}, {date_var}.month != {month_expr}, {date_var}.day != {day_expr})"
306 elif op == ">=":
307 # Greater or equal: lexicographic comparison
308 return f"Or({date_var}.year > {year_expr}, And({date_var}.year == {year_expr}, Or({date_var}.month > {month_expr}, And({date_var}.month == {month_expr}, {date_var}.day >= {day_expr}))))"
309 elif op == "<=":
310 # Less or equal: lexicographic comparison
311 return f"Or({date_var}.year < {year_expr}, And({date_var}.year == {year_expr}, Or({date_var}.month < {month_expr}, And({date_var}.month == {month_expr}, {date_var}.day <= {day_expr}))))"
312 elif op == ">":
313 # Greater: lexicographic comparison (year > OR (year == AND month >) OR (year == AND month == AND day >))
314 return f"Or({date_var}.year > {year_expr}, And({date_var}.year == {year_expr}, Or({date_var}.month > {month_expr}, And({date_var}.month == {month_expr}, {date_var}.day > {day_expr}))))"
315 elif op == "<":
316 # Less: lexicographic comparison (year < OR (year == AND month <) OR (year == AND month == AND day <))
317 return f"Or({date_var}.year < {year_expr}, And({date_var}.year == {year_expr}, Or({date_var}.month < {month_expr}, And({date_var}.month == {month_expr}, {date_var}.day < {day_expr}))))"
319 return None
321 def _invert_operator(self, op: str) -> str:
322 """Invert a comparison operator."""
323 inversions = {
324 ">=": "<=",
325 "<=": ">=",
326 ">": "<",
327 "<": ">",
328 "==": "==",
329 "!=": "!=",
330 }
331 return inversions.get(op, op)
333 # Date expression transformers
334 def date_add_period(self, items) -> str:
335 """Transform date + period operation."""
336 filtered = [item for item in items if str(item) != "+"]
337 if len(filtered) == 2: 337 ↛ 339line 337 didn't jump to line 339 because the condition on line 337 was always true
338 return f"{filtered[0]} + {filtered[1]}"
339 return f"{items[0]} + {items[-1]}"
341 def date_sub_period(self, items) -> str:
342 """Transform date - period operation."""
343 filtered = [item for item in items if str(item) != "-"]
344 if len(filtered) == 2: 344 ↛ 346line 344 didn't jump to line 346 because the condition on line 344 was always true
345 return f"{filtered[0]} - {filtered[1]}"
346 return f"{items[0]} - {items[-1]}"
348 def date_atom(self, items) -> str:
349 """Transform date atom (handle parenthesized expressions)."""
350 # If it's just one item, return it
351 if len(items) == 1: 351 ↛ 352line 351 didn't jump to line 352 because the condition on line 351 was never true
352 return str(items[0])
353 # If it's LPAR expr RPAR, preserve parentheses for nested expressions
354 if len(items) == 3 and str(items[0]) == "(" and str(items[2]) == ")": 354 ↛ 357line 354 didn't jump to line 357 because the condition on line 354 was always true
355 return f"({items[1]})"
356 # Otherwise return all items joined
357 return " ".join(str(item) for item in items)
359 # Period expression transformers
360 def period_add(self, items) -> str:
361 """Transform period + period operation."""
362 filtered = [item for item in items if str(item) != "+"]
363 if len(filtered) == 2:
364 return f"{filtered[0]} + {filtered[1]}"
365 return f"{items[0]} + {items[-1]}"
367 def period_sub(self, items) -> str:
368 """Transform period - period operation."""
369 filtered = [item for item in items if str(item) != "-"]
370 if len(filtered) == 2:
371 return f"{filtered[0]} - {filtered[1]}"
372 return f"{items[0]} - {items[-1]}"
374 def int_mul_period(self, items) -> str:
375 """Transform int * period operation."""
376 filtered = [item for item in items if str(item) != "*"]
377 if len(filtered) == 2:
378 return f"{filtered[0]} * {filtered[1]}"
379 return f"{items[0]} * {items[-1]}"
381 def period_mul_int(self, items) -> str:
382 """Transform period * int operation."""
383 filtered = [item for item in items if str(item) != "*"]
384 if len(filtered) == 2:
385 return f"{filtered[0]} * {filtered[1]}"
386 return f"{items[0]} * {items[-1]}"
388 def period_atom(self, items) -> str:
389 """Transform period atom (handle parenthesized expressions)."""
390 if len(items) == 1:
391 return str(items[0])
392 if len(items) == 3 and str(items[0]) == "(" and str(items[2]) == ")":
393 return f"({items[1]})"
394 return " ".join(str(item) for item in items)
396 # Integer expression transformers
397 def int_add(self, items) -> str:
398 """Transform integer addition operation."""
399 filtered = [item for item in items if str(item) != "+"]
400 if len(filtered) == 2: 400 ↛ 402line 400 didn't jump to line 402 because the condition on line 400 was always true
401 return f"{filtered[0]} + {filtered[1]}"
402 return f"{items[0]} + {items[-1]}"
404 def int_sub(self, items) -> str:
405 """Transform integer subtraction operation."""
406 filtered = [item for item in items if str(item) != "-"]
407 if len(filtered) == 2: 407 ↛ 409line 407 didn't jump to line 409 because the condition on line 407 was always true
408 return f"{filtered[0]} - {filtered[1]}"
409 return f"{items[0]} - {items[-1]}"
411 def int_mul(self, items) -> str:
412 """Transform integer multiplication (linear only: const * var or var * const)."""
413 filtered = [item for item in items if str(item) != "*"]
414 if len(filtered) == 2: 414 ↛ 418line 414 didn't jump to line 418 because the condition on line 414 was always true
415 left = str(filtered[0])
416 right = str(filtered[1])
417 else:
418 left = str(items[0])
419 right = str(items[-1])
421 # Validate linearity: one side must be a constant
422 left_is_const = self._is_constant(left)
423 right_is_const = self._is_constant(right)
425 if not (left_is_const or right_is_const): 425 ↛ 426line 425 didn't jump to line 426 because the condition on line 425 was never true
426 raise ValueError(
427 f"Nonlinear arithmetic not allowed: '{left} * {right}'. "
428 f"Multiplication must be between a constant and a variable (e.g., '5 * x' or 'x * 5')."
429 )
431 return f"{left} * {right}"
433 def int_neg(self, items) -> str:
434 """Transform integer negation operation."""
435 filtered = [item for item in items if str(item) != "-"]
436 if filtered: 436 ↛ 438line 436 didn't jump to line 438 because the condition on line 436 was always true
437 return f"-{filtered[0]}"
438 return f"-{items[-1]}"
440 def int_atom(self, items) -> str:
441 """Transform int atom (handle parenthesized expressions). Always preserve parentheses."""
442 if len(items) == 1:
443 return str(items[0])
444 if len(items) == 3 and str(items[0]) == "(" and str(items[2]) == ")":
445 return f"({items[1]})"
446 return " ".join(str(item) for item in items)
448 @staticmethod
449 def _is_constant(expr: str) -> bool:
450 """
451 Check if an expression is a constant (signed number).
453 Args:
454 expr: Expression string to check
456 Returns:
457 True if the expression is a constant number, False otherwise
458 """
459 expr = expr.strip()
460 # Remove parentheses if present
461 if expr.startswith("(") and expr.endswith(")"): 461 ↛ 462line 461 didn't jump to line 462 because the condition on line 461 was never true
462 expr = expr[1:-1].strip()
463 # Check if it's a signed number
464 return bool(re.match(r"^-?\d+$", expr))
466 # Variable transformer (unified, type checking happens at usage)
467 def variable(self, items) -> str:
468 """Transform variable reference."""
469 return str(items[0])
471 def _check_variable_type(
472 self, var_name: str, expected_type: str, context: str
473 ) -> None:
474 """
475 Check if a variable is used in the correct type context.
477 Args:
478 var_name: Name of the variable
479 expected_type: Expected type ('date', 'int', or 'bool')
480 context: Context where the variable is used (for error messages)
482 Raises:
483 ValueError: If variable type doesn't match expected type
484 """
485 if var_name not in self.variable_types:
486 # Variable not in type map - will be caught later by undeclared variable check
487 return
489 actual_type = self.variable_types[var_name]
490 if actual_type != expected_type:
491 raise ValueError(
492 f"Type error: Variable '{var_name}' has type '{actual_type}' but is used as '{expected_type}' "
493 f"in context '{context}'. Variables must be used consistently with their declared type."
494 )
496 # Date field access (property access on dates)
497 def date_field_access(self, items) -> str:
498 """Transform date field access like x.year, x.month, x.day, or Date(...).year."""
499 # items = [date_variable/date_constructor, DOT, date_field]
500 date_expr = items[0]
501 field_name = items[2]
502 return f"{date_expr}.{field_name}"
504 def date_field(self, items) -> str:
505 """Transform date field name (year, month, day)."""
506 return str(items[0])
508 # Constructors
509 def date_constructor(self, items) -> str:
510 """Transform Date constructor (arguments are int_expr)."""
511 # items = [LPAR, year_int_expr, month_int_expr, day_int_expr, RPAR]
512 # Filter out parentheses and commas if present
513 args = [str(item) for item in items if str(item) not in ["(", ")", ","]]
514 if len(args) >= 3: 514 ↛ 517line 514 didn't jump to line 517 because the condition on line 514 was always true
515 return f"Date({args[0]}, {args[1]}, {args[2]})"
516 # Fallback: assume items are [year, month, day]
517 return f"Date({items[0]}, {items[1]}, {items[2]})"
519 def period_constructor(self, items) -> str:
520 """Transform Period constructor (arguments are int_expr)."""
521 # items = [LPAR, years_int_expr, months_int_expr, days_int_expr, RPAR]
522 # Filter out parentheses and commas if present
523 args = [str(item) for item in items if str(item) not in ["(", ")", ","]]
524 if len(args) >= 3: 524 ↛ 527line 524 didn't jump to line 527 because the condition on line 524 was always true
525 return f"Period({args[0]}, {args[1]}, {args[2]})"
526 # Fallback: assume items are [years, months, days]
527 return f"Period({items[0]}, {items[1]}, {items[2]})"
529 def comparison_op(self, items) -> str:
530 """Transform comparison operator."""
531 if not items: 531 ↛ 532line 531 didn't jump to line 532 because the condition on line 531 was never true
532 return ""
533 return str(items[0])
535 def int_const(self, items) -> str:
536 """Transform integer constant."""
537 return str(items[0])
539 # Legacy methods for backward compatibility
540 def number(self, items) -> str:
541 """Transform number literal (legacy)."""
542 return str(items[0])
544 def string(self, items) -> str:
545 """Transform string literal (legacy)."""
546 return str(items[0])
549class ConstraintParser:
550 """Parser for structured constraint format using Lark grammar."""
552 def __init__(self):
553 """Initialize the parser with Lark grammar."""
554 self.variable_types: Dict[str, str] = (
555 {}
556 ) # Maps variable name to type: 'date', 'int', or 'bool'
558 # Define the type-safe grammar for constraint parsing
559 # Type safety is enforced by construction: int_expr, date_expr, period_expr, bool_expr
560 self.grammar = r"""
561 constraint: bool_expr
563 // Boolean expression hierarchy with proper precedence
564 ?bool_expr: implication
566 ?implication: equality
567 | equality IMPLIES implication -> implication
569 ?equality: disjunction
570 | disjunction EQ disjunction -> eq_bool
571 | disjunction NE disjunction -> ne_bool
573 ?disjunction: conjunction
574 | disjunction OR conjunction -> or_op
576 ?conjunction: negation
577 | conjunction AND negation -> and_op
579 ?negation: NOT negation -> not_op
580 | bool_atom
582 ?bool_atom: date_comparison
583 | int_comparison
584 | bool_literal
585 | variable
586 | LPAR bool_expr RPAR
588 // Type-safe comparisons: dates compare with dates, ints compare with ints
589 // Note: Variables can be either type, so both rules can match - Earley parser handles ambiguity
590 // The transformer will resolve based on inferred variable types
591 date_comparison: date_expr comparison_op date_expr
592 int_comparison: int_expr comparison_op int_expr
594 comparison_op: LT | LTE | GT | GTE | EQ | NE
595 LT: "<"
596 LTE: "<="
597 GT: ">"
598 GTE: ">="
599 EQ: "=="
600 NE: "!="
602 // Date expressions: dates can be combined with periods
603 ?date_expr: date_expr PLUS period_expr -> date_add_period
604 | date_expr MINUS period_expr -> date_sub_period
605 | date_atom
607 ?date_atom: date_constructor
608 | variable
609 | LPAR date_expr RPAR
611 date_constructor: "Date" LPAR int_expr "," int_expr "," int_expr RPAR
613 // Period expressions: periods can be combined with periods or scaled by concrete integers only
614 ?period_expr: period_expr PLUS period_expr -> period_add
615 | period_expr MINUS period_expr -> period_sub
616 | int_const STAR period_expr -> int_mul_period
617 | period_expr STAR int_const -> period_mul_int
618 | period_atom
620 ?period_atom: period_constructor
621 | LPAR period_expr RPAR
623 period_constructor: "Period" LPAR int_const "," int_const "," int_const RPAR
625 // Integer expressions: linear arithmetic
626 ?int_expr: int_expr PLUS int_term -> int_add
627 | int_expr MINUS int_term -> int_sub
628 | int_term
630 ?int_term: int_const STAR int_term -> int_mul
631 | int_term STAR int_const -> int_mul
632 | int_factor
634 ?int_factor: MINUS int_factor -> int_neg
635 | int_atom
637 ?int_atom: int_const
638 | variable
639 | date_field_access
640 | LPAR int_expr RPAR
642 int_const: SIGNED_NUMBER
644 // Property access on dates (extracts year, month, or day as integer)
645 // Allows field access on any date expression, e.g., (d + Period(1, 0, 0)).year
646 date_field_access: date_expr DOT date_field
648 date_field: DATE_FIELD
649 DATE_FIELD: "year" | "month" | "day"
651 // Unified variable rule (type checking happens in transformer)
652 variable: CNAME
654 // Boolean literals
655 bool_literal: BOOL_LITERAL
656 BOOL_LITERAL: "True" | "False"
658 // Operators
659 IMPLIES: "->"
660 OR: "||" | /\bor\b/i | /\bOR\b/
661 AND: "&&" | /\band\b/i | /\bAND\b/
662 NOT: "!" | /\bnot\b/i | /\bNOT\b/
663 PLUS: "+"
664 MINUS: "-"
665 STAR: "*"
666 LPAR: "("
667 RPAR: ")"
668 DOT: "."
670 %import common.CNAME
671 %import common.SIGNED_NUMBER
672 %import common.WS
673 %ignore WS
674 """
676 # Create the Lark parser with Earley algorithm (handles ambiguities better)
677 self.parser = Lark(
678 self.grammar,
679 parser="earley",
680 start="constraint",
681 )
682 # Note: transformer will be recreated in generate_builder_code() with variable_types
683 self.transformer = None
685 def _validate_parentheses_balance(self, constraint_str: str) -> None:
686 """
687 Validate that parentheses are balanced in the constraint.
689 Raises:
690 ValueError: If parentheses are unbalanced with helpful error message
691 """
692 if not constraint_str:
693 return
695 depth = 0
696 position = 0
698 for i, char in enumerate(constraint_str):
699 if char == "(":
700 depth += 1
701 elif char == ")":
702 depth -= 1
703 if depth < 0: 703 ↛ 705line 703 didn't jump to line 705 because the condition on line 703 was never true
704 # More closing than opening parens at this position
705 context_start = max(0, i - 20)
706 context_end = min(len(constraint_str), i + 20)
707 context = constraint_str[context_start:context_end]
708 pointer = " " * (i - context_start) + "^"
709 raise ValueError(
710 f"Unbalanced parentheses: found closing ')' without matching opening '(' at position {i}\n"
711 f" {context}\n"
712 f" {pointer}"
713 )
714 position = i
716 if depth > 0: 716 ↛ 719line 716 didn't jump to line 719 because the condition on line 716 was never true
717 # More opening than closing parens
718 # Find the first unmatched opening paren
719 temp_depth = 0
720 first_unmatched = -1
721 for i, char in enumerate(constraint_str):
722 if char == "(":
723 if temp_depth == 0:
724 first_unmatched = i
725 temp_depth += 1
726 elif char == ")":
727 temp_depth -= 1
728 if temp_depth == 0:
729 first_unmatched = -1
731 context_start = max(0, first_unmatched - 20)
732 context_end = min(len(constraint_str), first_unmatched + 40)
733 context = constraint_str[context_start:context_end]
734 pointer = " " * (first_unmatched - context_start) + "^"
735 raise ValueError(
736 f"Unbalanced parentheses: {depth} unclosed opening '(' found, starting at position {first_unmatched}\n"
737 f" {context}\n"
738 f" {pointer}"
739 )
741 def parse_constraint(
742 self, constraint_str: str, variable_types: Dict[str, str] = None
743 ) -> str:
744 """
745 Parse a single constraint string and return the corresponding Python code.
747 Args:
748 constraint_str: Constraint string like "x>=Date(2000,2,28)"
749 variable_types: Optional dictionary mapping variable names to types
751 Returns:
752 Python code string for the constraint
753 """
754 # Remove whitespace
755 constraint_str = constraint_str.strip()
757 # Check for unbalanced parentheses
758 self._validate_parentheses_balance(constraint_str)
760 # Check for invalid variable names that should raise ValueError
761 if self._is_invalid_variable_name(constraint_str):
762 raise ValueError(
763 f"Could not parse constraint '{constraint_str}': Invalid variable name"
764 )
765 try:
766 # Create transformer with variable types
767 transformer = ConstraintTransformer(variable_types or self.variable_types)
769 # Parse using Lark
770 tree = self.parser.parse(constraint_str)
772 # Validate that the parse tree is consistent with declared types
773 # This catches cases where grammar is ambiguous but declared types make it clear
774 if variable_types or self.variable_types:
775 self._validate_parse_tree_types(
776 tree, variable_types or self.variable_types
777 )
779 # Apply transformer
780 result = transformer.transform(tree)
781 return result
782 except Exception as e:
783 raise ValueError(f"Could not parse constraint '{constraint_str}': {e}")
785 def _is_invalid_variable_name(self, constraint_str: str) -> bool:
786 """Check if the constraint contains invalid variable names."""
787 # Handle empty or whitespace-only strings
788 if not constraint_str or constraint_str.isspace():
789 return False
791 # Property names that are valid after a dot
792 valid_property_names = {"year", "month", "day"}
794 # Check for invalid variable names like var-123, var.123, 123var
795 # But exclude cases where hyphen/dot is followed by keywords like Period/Date or '('
796 # This distinguishes "var-123" (invalid) from "z-Period(...)" (valid subtraction)
797 pattern1 = r"\b[a-zA-Z_][a-zA-Z0-9_]*[-.][a-zA-Z0-9_]+\b"
798 matches = re.finditer(pattern1, constraint_str)
799 for match in matches:
800 matched_text = match.group()
801 # Check if the part after hyphen/dot is a keyword (Period, Date) or a property name
802 parts = re.split(r"[-.]", matched_text, maxsplit=1)
803 if len(parts) == 2: 803 ↛ 799line 803 didn't jump to line 799 because the condition on line 803 was always true
804 second_part = parts[1]
805 # If it's a keyword or property name, this is valid
806 if (
807 second_part in ["Period", "Date"]
808 or second_part in valid_property_names
809 ):
810 continue
811 # Check if followed by '(' which would indicate a function call
812 end_pos = match.end()
813 if end_pos < len(constraint_str) and constraint_str[end_pos] == "(": 813 ↛ 814line 813 didn't jump to line 814 because the condition on line 813 was never true
814 continue
815 # Otherwise, it's an invalid variable name
816 return True
818 # Check for invalid variable names starting with digits like 123var
819 if re.search(r"\b[0-9]+[a-zA-Z_][a-zA-Z0-9_]*\b", constraint_str):
820 return True
821 return False
823 def extract_variable_declarations(self, constraints: List[str]) -> Dict[str, str]:
824 """
825 Extract variable declarations from constraints.
826 Looks for patterns like "x: date", "y: int", "z: bool".
828 Args:
829 constraints: List of constraint strings
831 Returns:
832 Dictionary mapping variable names to their types ('date', 'int', or 'bool')
833 """
834 declarations = {}
835 # Pattern to match variable declarations: "variable_name: type"
836 # where type is date, int, or bool
837 declaration_pattern = r"^\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:\s*(date|int|bool)\s*$"
839 for constraint in constraints:
840 constraint = constraint.strip()
841 match = re.match(declaration_pattern, constraint, re.IGNORECASE)
842 if match:
843 var_name = match.group(1)
844 var_type = match.group(2).lower() # Normalize to lowercase
845 if var_type in ["date", "int", "bool"]: 845 ↛ 839line 845 didn't jump to line 839 because the condition on line 845 was always true
846 declarations[var_name] = var_type
848 return declarations
850 def filter_declarations_from_constraints(self, constraints: List[str]) -> List[str]:
851 """
852 Filter out variable declarations from the constraints list.
854 Args:
855 constraints: List of constraint strings
857 Returns:
858 Filtered list with declarations removed
859 """
860 filtered = []
861 declaration_pattern = r"^\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:\s*(date|int|bool)\s*$"
863 for constraint in constraints:
864 constraint = constraint.strip()
865 if not re.match(declaration_pattern, constraint, re.IGNORECASE):
866 filtered.append(constraint)
868 return filtered
870 def extract_variables_from_constraints(self, constraints: List[str]) -> List[str]:
871 """
872 Extract all variable names from constraints.
874 Args:
875 constraints: List of constraint strings
877 Returns:
878 Sorted list of unique variable names found in constraints
879 """
880 variables = set()
882 for constraint in constraints:
883 # Remove property access patterns (var.property) to avoid
884 # extracting property names as variables
885 # Replace patterns like ".year", ".month", ".day" with empty string
886 cleaned_constraint = re.sub(r"\.(?:year|month|day)\b", "", constraint)
888 # Find all potential variable names using regex
889 # This matches CNAME pattern: [a-zA-Z_][a-zA-Z0-9_]*
890 var_matches = re.findall(r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", cleaned_constraint)
892 for match in var_matches:
893 # Filter out keywords and constructors
894 if match not in [
895 "Date",
896 "Period",
897 "And",
898 "Or",
899 "Not",
900 "Implies",
901 "and",
902 "or",
903 "not",
904 "True",
905 "False",
906 ]:
907 variables.add(match)
909 return sorted(list(variables))
911 def infer_variable_types_from_context(
912 self, constraints: List[str], skip_declared: Dict[str, str] = None
913 ) -> Dict[str, str]:
914 """
915 Infer variable types from their usage context in constraints using the parse tree.
917 This method parses each constraint and walks the parse tree to infer types:
918 - Variables in date_expr context → 'date'
919 - Variables in int_expr context → 'int'
920 - Variables in bool_expr context (standalone) → 'bool'
921 - Variables in Date()/Period() constructors → 'int'
923 Args:
924 constraints: List of constraint strings
925 skip_declared: Dictionary of already-declared variables to skip inference for
927 Returns:
928 Dictionary mapping inferred variable names to their types (excludes skip_declared vars)
929 """
930 inferred_types = {}
931 skip_declared = skip_declared or {}
933 for constraint in constraints:
934 try:
935 # Parse the constraint to get the tree structure
936 tree = self.parser.parse(constraint)
938 # Walk the tree to infer variable types from context
939 self._infer_types_from_tree(tree, inferred_types, skip_declared)
941 except Exception:
942 # If parsing fails, fall back to regex-based inference for Date() args
943 # Find variables inside Date() constructor arguments
944 date_pattern = (
945 r"Date\s*\(\s*([^,)]+)\s*,\s*([^,)]+)\s*,\s*([^,)]+)\s*\)"
946 )
947 date_matches = re.finditer(date_pattern, constraint)
948 for match in date_matches:
949 for arg in [match.group(1), match.group(2), match.group(3)]:
950 var_names = re.findall(r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", arg)
951 for var_name in var_names:
952 if var_name not in [
953 "Date",
954 "Period",
955 "And",
956 "Or",
957 "Not",
958 "Implies",
959 "and",
960 "or",
961 "not",
962 "True",
963 "False",
964 "year",
965 "month",
966 "day",
967 ]:
968 if var_name not in skip_declared:
969 property_access_pattern = rf"\b{re.escape(var_name)}\s*\.\s*(?:year|month|day)\b"
970 if not re.search(property_access_pattern, arg):
971 inferred_types[var_name] = "int"
973 return inferred_types
975 def _validate_parse_tree_types(self, tree, variable_types: Dict[str, str]) -> None:
976 """
977 Validate that the parse tree is consistent with declared variable types.
979 This is called after parsing but before transformation to reject ambiguous
980 parses that don't match declared types.
982 Args:
983 tree: Lark parse tree
984 variable_types: Dictionary of declared variable types
986 Raises:
987 ValueError: If declared types conflict with parse tree structure
988 """
989 from lark import Token, Tree
991 if isinstance(tree, Token): 991 ↛ 992line 991 didn't jump to line 992 because the condition on line 991 was never true
992 return
994 if not isinstance(tree, Tree): 994 ↛ 995line 994 didn't jump to line 995 because the condition on line 994 was never true
995 return
997 rule = tree.data
999 # Check comparisons for type consistency
1000 if rule in ["date_comparison", "int_comparison"]:
1001 # Collect all declared variables and check for mixed types or concrete type indicators
1002 declared_vars = []
1003 has_date_literal = False
1004 has_int_literal = False
1006 for child in tree.children:
1007 if isinstance(child, Tree): 1007 ↛ 1006line 1007 didn't jump to line 1006 because the condition on line 1007 was always true
1008 if child.data == "variable":
1009 var_name = str(child.children[0])
1010 if var_name in variable_types:
1011 declared_vars.append((var_name, variable_types[var_name]))
1012 elif child.data == "date_constructor":
1013 has_date_literal = True
1014 elif child.data == "int_const":
1015 has_int_literal = True
1017 # If there are concrete literals, enforce strict type matching
1018 if has_date_literal:
1019 # Date literal present - all variables must be date type
1020 for var_name, var_type in declared_vars:
1021 if var_type != "date":
1022 raise ValueError(
1023 f"Type error: Cannot compare {var_type} variable '{var_name}' with Date literal"
1024 )
1025 elif has_int_literal:
1026 # Int literal present - all variables must be int/bool type
1027 for var_name, var_type in declared_vars:
1028 if var_type not in ["int", "bool"]: 1028 ↛ 1029line 1028 didn't jump to line 1029 because the condition on line 1028 was never true
1029 raise ValueError(
1030 f"Type error: Cannot compare {var_type} variable '{var_name}' with integer literal"
1031 )
1033 # If only variables (no literals), check they're all the same type
1034 elif len(declared_vars) >= 2:
1035 first_type = declared_vars[0][1]
1036 for var_name, var_type in declared_vars[1:]:
1037 if var_type != first_type:
1038 raise ValueError(
1039 f"Type error: Cannot compare {first_type} variable '{declared_vars[0][0]}' "
1040 f"with {var_type} variable '{var_name}'"
1041 )
1043 # Recurse into children
1044 for child in tree.children:
1045 if isinstance(child, Tree): 1045 ↛ 1044line 1045 didn't jump to line 1044 because the condition on line 1045 was always true
1046 self._validate_parse_tree_types(child, variable_types)
1048 # Recurse into all children
1049 else:
1050 for child in tree.children:
1051 if isinstance(child, Tree):
1052 self._validate_parse_tree_types(child, variable_types)
1054 def _infer_types_from_tree(
1055 self, tree, inferred_types: Dict[str, str], skip_declared: Dict[str, str] = None
1056 ) -> None:
1057 """
1058 Walk the parse tree and infer variable types from their context.
1060 Args:
1061 tree: Lark parse tree node
1062 inferred_types: Dictionary to populate with inferred types
1063 skip_declared: Dictionary of already-declared variables to skip
1064 """
1065 from lark import Token, Tree
1067 skip_declared = skip_declared or {}
1069 if isinstance(tree, Token): 1069 ↛ 1070line 1069 didn't jump to line 1070 because the condition on line 1069 was never true
1070 return
1072 if not isinstance(tree, Tree): 1072 ↛ 1073line 1072 didn't jump to line 1073 because the condition on line 1072 was never true
1073 return
1075 # Check the rule name to determine context
1076 rule = tree.data
1078 # Handle date comparisons - all variables must be dates
1079 if rule == "date_comparison":
1080 for child in tree.children:
1081 if isinstance(child, Tree): 1081 ↛ 1080line 1081 didn't jump to line 1080 because the condition on line 1081 was always true
1082 if child.data == "variable":
1083 var_name = str(child.children[0])
1084 # Skip True/False as they're boolean literals, not variables
1085 if var_name in ["True", "False"]:
1086 continue
1087 # Skip already-declared variables
1088 if var_name in skip_declared:
1089 continue
1090 if var_name not in inferred_types: 1090 ↛ 1080line 1090 didn't jump to line 1080 because the condition on line 1090 was always true
1091 inferred_types[var_name] = "date"
1092 else:
1093 # Recurse to handle nested expressions
1094 self._infer_types_from_tree(
1095 child, inferred_types, skip_declared
1096 )
1097 return
1099 # Handle int comparisons - all variables must be ints (or check for bool literals)
1100 if rule == "int_comparison":
1101 # Check for boolean literals in int comparisons (e.g., x == True)
1102 has_bool_literal = any(
1103 isinstance(child, Tree)
1104 and (
1105 (
1106 child.data == "variable"
1107 and str(child.children[0]) in ["True", "False"]
1108 )
1109 or child.data == "bool_literal"
1110 )
1111 for child in tree.children
1112 )
1114 for child in tree.children:
1115 if isinstance(child, Tree): 1115 ↛ 1114line 1115 didn't jump to line 1114 because the condition on line 1115 was always true
1116 if child.data == "variable":
1117 var_name = str(child.children[0])
1118 # Skip True/False as they're boolean literals, not variables
1119 if var_name in ["True", "False"]: 1119 ↛ 1120line 1119 didn't jump to line 1120 because the condition on line 1119 was never true
1120 continue
1121 # Skip already-declared variables
1122 if var_name in skip_declared: 1122 ↛ 1124line 1122 didn't jump to line 1124 because the condition on line 1122 was always true
1123 continue
1124 if var_name not in inferred_types:
1125 # If comparing with bool literal, infer as bool
1126 if has_bool_literal:
1127 inferred_types[var_name] = "bool"
1128 else:
1129 inferred_types[var_name] = "int"
1130 else:
1131 # Recurse to handle nested expressions
1132 self._infer_types_from_tree(
1133 child, inferred_types, skip_declared
1134 )
1135 return
1137 # Variables in date expressions
1138 if rule in ["date_expr", "date_atom", "date_add_period", "date_sub_period"]:
1139 for child in tree.children:
1140 if isinstance(child, Tree) and child.data == "variable":
1141 var_name = str(child.children[0])
1142 # Skip True/False as they're boolean literals
1143 if var_name in ["True", "False"]: 1143 ↛ 1144line 1143 didn't jump to line 1144 because the condition on line 1143 was never true
1144 continue
1145 if var_name not in inferred_types:
1146 inferred_types[var_name] = "date"
1148 # Variables in integer expressions
1149 elif rule in [
1150 "int_expr",
1151 "int_term",
1152 "int_atom",
1153 "int_add",
1154 "int_sub",
1155 "int_mul",
1156 "int_neg",
1157 ]:
1158 for child in tree.children:
1159 if isinstance(child, Tree) and child.data == "variable":
1160 var_name = str(child.children[0])
1161 # Skip True/False as they're boolean literals
1162 if var_name in ["True", "False"]: 1162 ↛ 1163line 1162 didn't jump to line 1163 because the condition on line 1162 was never true
1163 continue
1164 # Only infer as int if not already inferred as date
1165 if var_name not in inferred_types:
1166 inferred_types[var_name] = "int"
1168 # Variables in Date/Period constructors are always int
1169 elif rule in ["date_constructor", "period_constructor"]:
1170 for child in tree.children:
1171 if isinstance(child, Tree):
1172 self._extract_vars_as_int(child, inferred_types)
1174 # Date field access (k.year, k.month, k.day)
1175 elif rule == "date_field_access":
1176 # Variable with property access is a date
1177 for child in tree.children:
1178 if isinstance(child, Tree) and child.data == "variable":
1179 var_name = str(child.children[0])
1180 # Skip True/False as they're boolean literals
1181 if var_name in ["True", "False"]: 1181 ↛ 1182line 1181 didn't jump to line 1182 because the condition on line 1181 was never true
1182 continue
1183 if var_name not in inferred_types:
1184 inferred_types[var_name] = "date"
1186 # Standalone boolean variables (at constraint level)
1187 elif rule == "constraint":
1188 # Check if this is just a standalone variable (bool)
1189 if len(tree.children) == 1: 1189 ↛ 1200line 1189 didn't jump to line 1200 because the condition on line 1189 was always true
1190 child = tree.children[0]
1191 if isinstance(child, Tree) and child.data == "variable": 1191 ↛ 1192line 1191 didn't jump to line 1192 because the condition on line 1191 was never true
1192 var_name = str(child.children[0])
1193 # Skip True/False as they're boolean literals
1194 if var_name in ["True", "False"]:
1195 pass # Don't infer
1196 elif var_name not in inferred_types:
1197 inferred_types[var_name] = "bool"
1199 # Recurse into children
1200 for child in tree.children:
1201 if isinstance(child, Tree):
1202 self._infer_types_from_tree(child, inferred_types, skip_declared)
1204 def _extract_vars_as_int(self, tree, inferred_types: Dict[str, str]) -> None:
1205 """Extract all variables in a subtree as 'int' type.
1207 Skip variables that are part of date_field_access (e.g., x in x.year),
1208 as those should remain as date type.
1209 """
1210 from lark import Token, Tree
1212 if isinstance(tree, Token): 1212 ↛ 1213line 1212 didn't jump to line 1213 because the condition on line 1212 was never true
1213 return
1215 if isinstance(tree, Tree): 1215 ↛ exitline 1215 didn't return from function '_extract_vars_as_int' because the condition on line 1215 was always true
1216 # Skip date_field_access nodes - the variable in x.year should remain a date
1217 if tree.data == "date_field_access": 1217 ↛ 1218line 1217 didn't jump to line 1218 because the condition on line 1217 was never true
1218 return
1220 if tree.data == "variable":
1221 var_name = str(tree.children[0])
1222 # Skip True/False as they're boolean literals
1223 if var_name not in ["True", "False"]: 1223 ↛ 1226line 1223 didn't jump to line 1226 because the condition on line 1223 was always true
1224 inferred_types[var_name] = "int"
1226 for child in tree.children:
1227 if isinstance(child, Tree):
1228 self._extract_vars_as_int(child, inferred_types)
1230 def infer_date_component_bounds(self, constraints: List[str]) -> Dict[str, str]:
1231 """
1232 Infer which Date() component position each integer expression is used in.
1234 This allows us to automatically add appropriate bounds constraints:
1235 - Position 1 (year): 1900 <= expr <= 2100
1236 - Position 2 (month): 1 <= expr <= 12
1237 - Position 3 (day): 1 <= expr <= 31
1239 For example, Date(x+2000, 1, 1) will generate:
1240 - (x + 2000) >= 1900
1241 - (x + 2000) <= 2100
1243 Args:
1244 constraints: List of constraint strings
1246 Returns:
1247 Dictionary mapping expressions to their component type ('year', 'month', 'day', or 'mixed')
1248 'mixed' means the expression is used in multiple positions
1249 """
1250 component_usage = {} # expr -> set of component types
1252 for constraint in constraints:
1253 # Find Date() constructors and their arguments
1254 date_pattern = r"Date\s*\(\s*([^,)]+)\s*,\s*([^,)]+)\s*,\s*([^,)]+)\s*\)"
1255 date_matches = re.finditer(date_pattern, constraint)
1257 for match in date_matches:
1258 args = [
1259 match.group(1).strip(),
1260 match.group(2).strip(),
1261 match.group(3).strip(),
1262 ]
1263 component_types = ["year", "month", "day"]
1265 for arg, component_type in zip(args, component_types):
1266 # Skip if this is a concrete constant (no variables)
1267 has_variables = bool(re.search(r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", arg))
1268 if not has_variables:
1269 continue
1271 # Skip if this is a property access (e.g., x.year)
1272 if re.search( 1272 ↛ 1275line 1272 didn't jump to line 1275 because the condition on line 1272 was never true
1273 r"\b[a-zA-Z_][a-zA-Z0-9_]*\s*\.\s*(?:year|month|day)\b", arg
1274 ):
1275 continue
1277 # Check if the expression contains any keywords that should be excluded
1278 expr_vars = re.findall(r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", arg)
1279 if any( 1279 ↛ 1299line 1279 didn't jump to line 1299 because the condition on line 1279 was never true
1280 v
1281 in [
1282 "Date",
1283 "Period",
1284 "And",
1285 "Or",
1286 "Not",
1287 "Implies",
1288 "and",
1289 "or",
1290 "not",
1291 "True",
1292 "False",
1293 "year",
1294 "month",
1295 "day",
1296 ]
1297 for v in expr_vars
1298 ):
1299 continue
1301 # Store the full expression (not just variable names)
1302 if arg not in component_usage: 1302 ↛ 1304line 1302 didn't jump to line 1304 because the condition on line 1302 was always true
1303 component_usage[arg] = set()
1304 component_usage[arg].add(component_type)
1306 # Convert sets to single strings, or 'mixed' if used in multiple positions
1307 result = {}
1308 for expr, components in component_usage.items():
1309 if len(components) == 1: 1309 ↛ 1312line 1309 didn't jump to line 1312 because the condition on line 1309 was always true
1310 result[expr] = list(components)[0]
1311 else:
1312 result[expr] = "mixed"
1314 return result
1316 def _extract_and_replace_parametric_dates(
1317 self, constraints: List[str], variable_types: Dict[str, str]
1318 ) -> tuple[List[str], Dict[str, tuple[str, str, str]]]:
1319 """
1320 Extract parametric Date constructors and replace them with auxiliary variables.
1322 A parametric Date constructor has one or more variable arguments, e.g., Date(x.year, 2, 28).
1323 These need special handling because they can't be constructed directly at runtime.
1325 Args:
1326 constraints: List of constraint strings
1327 variable_types: Dictionary of declared variable types
1329 Returns:
1330 Tuple of (modified_constraints, parametric_dates_map)
1331 where parametric_dates_map is {aux_var_name: (year_expr, month_expr, day_expr)}
1332 """
1333 import re
1335 parametric_dates = {} # aux_var -> (year, month, day)
1336 aux_counter = [0] # Use list for mutability in nested function
1338 def replace_parametric_date(match):
1339 """Replace a parametric Date constructor with an auxiliary variable."""
1340 full_match = match.group(0)
1341 args = match.group(1)
1343 # Parse the three arguments
1344 # Simple comma splitting won't work due to nested expressions, so use a proper parser
1345 depth = 0
1346 current_arg = []
1347 parsed_args = []
1349 for char in args:
1350 if char == "(": 1350 ↛ 1351line 1350 didn't jump to line 1351 because the condition on line 1350 was never true
1351 depth += 1
1352 current_arg.append(char)
1353 elif char == ")": 1353 ↛ 1354line 1353 didn't jump to line 1354 because the condition on line 1353 was never true
1354 depth -= 1
1355 current_arg.append(char)
1356 elif char == "," and depth == 0:
1357 parsed_args.append("".join(current_arg).strip())
1358 current_arg = []
1359 else:
1360 current_arg.append(char)
1362 if current_arg: 1362 ↛ 1365line 1362 didn't jump to line 1365 because the condition on line 1362 was always true
1363 parsed_args.append("".join(current_arg).strip())
1365 if len(parsed_args) != 3: 1365 ↛ 1366line 1365 didn't jump to line 1366 because the condition on line 1365 was never true
1366 return full_match # Not a valid Date constructor
1368 year_expr, month_expr, day_expr = parsed_args
1370 # Check if any argument contains a variable (not just constants)
1371 has_variable = (
1372 ConstraintTransformer._has_variable(year_expr)
1373 or ConstraintTransformer._has_variable(month_expr)
1374 or ConstraintTransformer._has_variable(day_expr)
1375 )
1377 if not has_variable:
1378 # All concrete - no transformation needed
1379 return full_match
1381 # This is a parametric Date - create an auxiliary variable
1382 aux_var = f"_aux_date_{aux_counter[0]}"
1383 aux_counter[0] += 1
1385 parametric_dates[aux_var] = (year_expr, month_expr, day_expr)
1387 return aux_var
1389 # Pattern to match Date(...) constructors
1390 # This matches Date( followed by balanced parentheses
1391 date_pattern = r"Date\(([^)]+(?:\([^)]*\)[^)]*)*)\)"
1393 modified_constraints = []
1394 for constraint in constraints:
1395 # Replace all parametric Date constructors in this constraint
1396 modified = re.sub(date_pattern, replace_parametric_date, constraint)
1397 modified_constraints.append(modified)
1399 return modified_constraints, parametric_dates
1401 def generate_builder_code(
1402 self,
1403 constraints: List[str],
1404 declarations: List[str] = None,
1405 ) -> str:
1406 """
1407 Generate complete DateSATBuilder code from structured constraint data.
1409 Each constraint is a full boolean expression that can include:
1410 - Comparisons: x >= Date(2000,2,28)
1411 - Boolean operators: && (and), || (or), ! (not)
1412 - Implications: (A) -> (B)
1413 - Nested expressions: ((a || b) || c) || d
1414 - Parametric Date constructors: Date(x.year, 2, 28) in any context
1416 All constraints in the list are ANDed together.
1418 Variable declarations can be provided separately via the `declarations` parameter,
1419 or mixed in with constraints (backward compatibility). All variables used in constraints
1420 must be explicitly declared.
1422 Args:
1423 constraints: List of constraint strings (each is a full boolean expression)
1424 declarations: Optional list of variable declarations like ["x: date", "y: int"].
1425 If provided, declarations are not extracted from constraints.
1427 Returns:
1428 Complete Python code string
1430 Raises:
1431 ValueError: If any variable used in constraints is not explicitly declared
1432 """
1433 code_lines = [
1434 "from z3 import Or, And, Not, Int, Bool, Implies",
1435 "builder = DateSATBuilder()",
1436 "",
1437 ]
1439 # Extract variable declarations
1440 if declarations is not None:
1441 # New format: declarations provided separately
1442 variable_types = self.extract_variable_declarations(declarations)
1443 filtered_constraints = (
1444 constraints # No need to filter, constraints don't contain declarations
1445 )
1446 else:
1447 # Old format: declarations mixed with constraints (backward compatibility)
1448 variable_types = self.extract_variable_declarations(constraints)
1449 filtered_constraints = self.filter_declarations_from_constraints(
1450 constraints
1451 )
1453 self.variable_types = variable_types
1455 # Validate parentheses balance in all constraints FIRST (before any other validation)
1456 for constraint in filtered_constraints:
1457 self._validate_parentheses_balance(constraint)
1459 # Auto-extract variables from remaining constraints
1460 # We extract from ORIGINAL constraints (before transformation) to capture
1461 # variables inside parametric Date constructors for type inference
1462 all_variables = self.extract_variables_from_constraints(filtered_constraints)
1464 # Check for common mistakes: lowercase boolean literals (before type inference)
1465 lowercase_bools = {"true": "True", "false": "False"}
1466 bool_mistakes = [var for var in all_variables if var in lowercase_bools]
1467 if bool_mistakes:
1468 corrections = [
1469 f"'{var}' should be '{lowercase_bools[var]}'" for var in bool_mistakes
1470 ]
1471 raise ValueError(
1472 f"Invalid boolean literal(s): {', '.join(corrections)}. "
1473 f"Python uses capitalized 'True' and 'False' for boolean values."
1474 )
1476 # Infer types for undeclared variables from their usage context
1477 # Skip inference for already-declared variables to avoid conflicts
1478 inferred_types = self.infer_variable_types_from_context(
1479 filtered_constraints, skip_declared=variable_types
1480 )
1482 # Check for type conflicts between explicit declarations and inferred usage
1483 # This catches cases where a variable is declared one type but used as another
1484 type_conflicts = []
1485 for var_name, inferred_type in inferred_types.items():
1486 if var_name in variable_types:
1487 declared_type = variable_types[var_name]
1488 if declared_type != inferred_type:
1489 type_conflicts.append((var_name, declared_type, inferred_type))
1491 if type_conflicts:
1492 conflict_messages = []
1493 for var_name, declared_type, inferred_type in type_conflicts:
1494 conflict_messages.append(
1495 f"'{var_name}' is declared as '{declared_type}' but inferred as '{inferred_type}' from usage context"
1496 )
1497 raise ValueError(
1498 f"Type conflict detected:\n - " + "\n - ".join(conflict_messages)
1499 )
1501 # Merge inferred types into variable_types (explicit declarations take precedence)
1502 for var_name, var_type in inferred_types.items():
1503 if var_name not in variable_types:
1504 variable_types[var_name] = var_type
1506 # Check for undeclared variables that couldn't be inferred
1507 undeclared_vars = [var for var in all_variables if var not in variable_types]
1508 if undeclared_vars: 1508 ↛ 1511line 1508 didn't jump to line 1511 because the condition on line 1508 was never true
1509 # For undeclared variables, use inferred types
1510 # This allows the parser to continue with best-effort type inference
1511 for var in undeclared_vars:
1512 if var in inferred_types:
1513 variable_types[var] = inferred_types[var]
1515 # Check if there are still undeclared variables after inference
1516 still_undeclared = [
1517 var for var in undeclared_vars if var not in variable_types
1518 ]
1519 if still_undeclared:
1520 undeclared_str = ", ".join(sorted(still_undeclared))
1521 raise ValueError(
1522 f"Could not infer types for variables: {undeclared_str}. "
1523 f"Please explicitly declare them using 'variable_name: type' syntax "
1524 f"(where type is 'date', 'int', or 'bool')."
1525 )
1527 # Infer bounds for integer variables used in Date() constructors
1528 component_bounds = self.infer_date_component_bounds(filtered_constraints)
1530 # Add variable declarations
1531 for var_name, var_type in sorted(variable_types.items()):
1532 if var_type == "date":
1533 code_lines.append(f'{var_name} = builder.add_date_var("{var_name}")')
1534 elif var_type == "int":
1535 # Create int variables without bounds - bounds will be added as constraints
1536 code_lines.append(f'{var_name} = builder.add_int_var("{var_name}")')
1537 elif var_type == "bool": 1537 ↛ 1531line 1537 didn't jump to line 1531 because the condition on line 1537 was always true
1538 code_lines.append(f'{var_name} = builder.add_bool_var("{var_name}")')
1540 # Add natural bounds constraints for integer expressions used in Date() constructors
1541 # Only add bounds for non-constant expressions (variables or complex expressions)
1542 if component_bounds:
1543 # Filter out constant numbers - they don't need bounds
1544 non_constant_bounds = {
1545 expr: comp_type
1546 for expr, comp_type in component_bounds.items()
1547 if not expr.lstrip(
1548 "-"
1549 ).isdigit() # Skip if it's a plain number like "2020" or "-5"
1550 }
1552 if non_constant_bounds: 1552 ↛ 1597line 1552 didn't jump to line 1597 because the condition on line 1552 was always true
1553 code_lines.append("")
1554 code_lines.append("# Automatic bounds for Date() component expressions")
1555 for expr, component_type in sorted(non_constant_bounds.items()):
1556 # Wrap complex expressions in parentheses for safety
1557 if any(op in expr for op in ["+", "-", "*"]):
1558 expr_wrapped = f"({expr})"
1559 else:
1560 expr_wrapped = expr
1562 if component_type == "year":
1563 code_lines.append(
1564 f"builder.add_constraint({expr_wrapped} >= 1900)"
1565 )
1566 code_lines.append(
1567 f"builder.add_constraint({expr_wrapped} <= 2100)"
1568 )
1569 elif component_type == "month":
1570 code_lines.append(
1571 f"builder.add_constraint({expr_wrapped} >= 1)"
1572 )
1573 code_lines.append(
1574 f"builder.add_constraint({expr_wrapped} <= 12)"
1575 )
1576 elif component_type == "day": 1576 ↛ 1583line 1576 didn't jump to line 1583 because the condition on line 1576 was always true
1577 code_lines.append(
1578 f"builder.add_constraint({expr_wrapped} >= 1)"
1579 )
1580 code_lines.append(
1581 f"builder.add_constraint({expr_wrapped} <= 31)"
1582 )
1583 elif component_type == "mixed":
1584 # Expression used in multiple positions - use most restrictive bounds
1585 code_lines.append(
1586 f"# Warning: {expr} used in multiple Date() positions - using conservative bounds"
1587 )
1588 code_lines.append(
1589 f"builder.add_constraint({expr_wrapped} >= 1)"
1590 )
1591 code_lines.append(
1592 f"builder.add_constraint({expr_wrapped} <= 2100)"
1593 )
1595 # STEP: Transform parametric Date constructors AFTER type inference and bounds checking
1596 # This must happen before parsing to avoid runtime errors
1597 transformed_constraints, parametric_dates = (
1598 self._extract_and_replace_parametric_dates(
1599 filtered_constraints, variable_types
1600 )
1601 )
1603 # Add auxiliary date variables to variable_types
1604 for aux_var in parametric_dates:
1605 variable_types[aux_var] = "date"
1607 # Add variable declarations for auxiliary date variables
1608 for aux_var in sorted(parametric_dates.keys()):
1609 code_lines.append(f'{aux_var} = builder.add_date_var("{aux_var}")')
1611 # Add constraints for parametric Date constructors (auxiliary variables)
1612 if parametric_dates:
1613 code_lines.append("")
1614 code_lines.append("# Constraints for parametric Date constructors")
1615 for aux_var, (year_expr, month_expr, day_expr) in sorted(
1616 parametric_dates.items()
1617 ):
1618 # Generate component-wise constraints: aux_var.year == year_expr, etc.
1619 code_lines.append(
1620 f"builder.add_constraint({aux_var}.year == {year_expr})"
1621 )
1622 code_lines.append(
1623 f"builder.add_constraint({aux_var}.month == {month_expr})"
1624 )
1625 code_lines.append(
1626 f"builder.add_constraint({aux_var}.day == {day_expr})"
1627 )
1629 # Add constraints (using TRANSFORMED constraints)
1630 # Each constraint is a full boolean expression - parse and add directly
1631 code_lines.append("")
1632 for constraint_str in transformed_constraints:
1633 constraint_code = self.parse_constraint(constraint_str, variable_types)
1634 code_lines.append(constraint_code)
1636 return "\n".join(code_lines)
1638 def parse_constraint_data(self, constraint_data: Dict[str, Any]) -> str:
1639 """
1640 Parse constraint data and return executable code.
1642 Supports two formats:
1643 1. New format (recommended): Separate 'declarations' and 'constraints' fields
1644 {
1645 "declarations": ["x: date", "y: int"],
1646 "constraints": ["x >= Date(2000,2,28)", "(a || b) && c"]
1647 }
1649 2. Old format (backward compatible): Declarations mixed with constraints
1650 {
1651 "constraints": ["x: date", "x >= Date(2000,2,28)", "(a || b) && c"]
1652 }
1654 Each constraint is a full boolean expression that can include:
1655 - Comparisons: x >= Date(2000,2,28)
1656 - Boolean operators: && (and), || (or), ! (not)
1657 - Implications: (A) -> (B)
1658 - Nested expressions: ((a || b) || c) || d
1660 All constraints in the list are ANDed together.
1662 Args:
1663 constraint_data: Dictionary with:
1664 - 'constraints': List of constraint strings (required)
1665 - 'declarations': Optional list of variable declarations (new format)
1667 Returns:
1668 Executable Python code string
1669 """
1670 constraints = constraint_data.get("constraints", [])
1671 declarations = constraint_data.get("declarations", None)
1673 # Ensure constraints is a list of strings (no nested lists)
1674 for constraint in constraints:
1675 if isinstance(constraint, list):
1676 raise ValueError(
1677 "Nested lists are not supported. Each constraint must be a string with boolean operators."
1678 )
1680 # If declarations provided, ensure it's also a list of strings
1681 if declarations is not None:
1682 for declaration in declarations:
1683 if isinstance(declaration, list): 1683 ↛ 1684line 1683 didn't jump to line 1684 because the condition on line 1683 was never true
1684 raise ValueError(
1685 "Nested lists are not supported. Each declaration must be a string like 'x: date'."
1686 )
1687 if not isinstance(declaration, str): 1687 ↛ 1688line 1687 didn't jump to line 1688 because the condition on line 1687 was never true
1688 raise ValueError(
1689 f"Invalid declaration format: {declaration}. Expected string like 'x: date'."
1690 )
1692 return self.generate_builder_code(constraints, declarations)