Coverage for datesat / constraint_parser.py: 74.1%

701 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-10 23:47 +0000

1""" 

2Constraint parser for DateSAT system using Lark parser generator. 

3 

4This module provides functionality to parse constraints from the structured format 

5and convert them into executable DateSATBuilder code using a context-free grammar. 

6""" 

7 

8import re 

9from typing import Any, Dict, List, Union 

10 

11from lark import Lark, Transformer 

12 

13 

14class ConstraintTransformer(Transformer): 

15 """Transformer to convert Lark parse tree to Python code.""" 

16 

17 def __init__(self, variable_types: Dict[str, str] = None): 

18 """ 

19 Initialize transformer with variable type information. 

20 

21 Args: 

22 variable_types: Dictionary mapping variable names to types ('date', 'int', 'bool') 

23 """ 

24 super().__init__() 

25 self.variable_types = variable_types or {} 

26 

27 def constraint(self, items) -> str: 

28 """Transform constraint (top level).""" 

29 bool_expr = items[0] 

30 

31 # Check if bool_expr already has builder.add_constraint wrapper (from implication) 

32 if bool_expr.startswith("builder.add_constraint("): 32 ↛ 33line 32 didn't jump to line 33 because the condition on line 32 was never true

33 return bool_expr 

34 

35 return f"builder.add_constraint({bool_expr})" 

36 

37 def implication(self, items) -> str: 

38 """Transform implication A -> B into Implies(A, B).""" 

39 # Earley parser might include the -> token, filter it out 

40 filtered = [item for item in items if str(item) != "->"] 

41 if len(filtered) == 2: 41 ↛ 46line 41 didn't jump to line 46 because the condition on line 41 was always true

42 left = str(filtered[0]) 

43 right = str(filtered[1]) 

44 else: 

45 # Fallback: use first and last items 

46 left = str(items[0]) 

47 right = str(items[-1]) 

48 

49 # Strip outermost parentheses from arguments if present 

50 # e.g., (a == b) -> (c == d) becomes Implies(a == b, c == d) 

51 left = self._strip_outer_parens(left) 

52 right = self._strip_outer_parens(right) 

53 

54 return f"Implies({left}, {right})" 

55 

56 def _strip_outer_parens(self, expr: str) -> str: 

57 """Strip outermost parentheses from an expression if they wrap the entire expression.""" 

58 expr = expr.strip() 

59 if expr.startswith("(") and expr.endswith(")"): 

60 # Check if these are the outermost matching parentheses 

61 depth = 0 

62 for i, char in enumerate(expr): 

63 if char == "(": 

64 depth += 1 

65 elif char == ")": 

66 depth -= 1 

67 # If depth hits 0 before the end, these aren't the outermost parens 

68 if depth == 0 and i < len(expr) - 1: 68 ↛ 69line 68 didn't jump to line 69 because the condition on line 68 was never true

69 return expr 

70 # If we get here, the parentheses wrap the whole expression 

71 return expr[1:-1] 

72 return expr 

73 

74 def or_op(self, items) -> str: 

75 """Transform OR operation A || B into Or(A, B).""" 

76 # Earley parser might include the operator token, filter it out 

77 filtered = [item for item in items if str(item) not in ("||", "or", "OR")] 

78 if len(filtered) == 2: 78 ↛ 83line 78 didn't jump to line 83 because the condition on line 78 was always true

79 left = str(filtered[0]) 

80 right = str(filtered[1]) 

81 else: 

82 # Fallback: use first and last items 

83 left = str(items[0]) 

84 right = str(items[-1]) 

85 

86 # Strip outermost parentheses from arguments 

87 left = self._strip_outer_parens(left) 

88 right = self._strip_outer_parens(right) 

89 

90 return f"Or({left}, {right})" 

91 

92 def and_op(self, items) -> str: 

93 """Transform AND operation A && B into And(A, B).""" 

94 # Earley parser might include the operator token, filter it out 

95 filtered = [item for item in items if str(item) not in ("&&", "and", "AND")] 

96 if len(filtered) == 2: 96 ↛ 101line 96 didn't jump to line 101 because the condition on line 96 was always true

97 left = str(filtered[0]) 

98 right = str(filtered[1]) 

99 else: 

100 # Fallback: use first and last items 

101 left = str(items[0]) 

102 right = str(items[-1]) 

103 

104 # Strip outermost parentheses from arguments 

105 left = self._strip_outer_parens(left) 

106 right = self._strip_outer_parens(right) 

107 

108 return f"And({left}, {right})" 

109 

110 def not_op(self, items) -> str: 

111 """Transform NOT operation !A / not A into Not(A).""" 

112 # items = [NOT token, not_expr] or [not_expr] if NOT token is stripped 

113 filtered = [item for item in items if str(item) not in ("!", "not", "NOT")] 

114 if filtered: 114 ↛ 117line 114 didn't jump to line 117 because the condition on line 114 was always true

115 expr = str(filtered[0]) 

116 else: 

117 expr = str(items[-1]) 

118 

119 # Strip outermost parentheses from argument 

120 expr = self._strip_outer_parens(expr) 

121 

122 return f"Not({expr})" 

123 

124 def eq_bool(self, items) -> str: 

125 """Transform boolean equality A == B.""" 

126 filtered = [item for item in items if str(item) != "=="] 

127 if len(filtered) == 2: 127 ↛ 131line 127 didn't jump to line 131 because the condition on line 127 was always true

128 left = str(filtered[0]) 

129 right = str(filtered[1]) 

130 return f"{left} == {right}" 

131 return f"{items[0]} == {items[-1]}" 

132 

133 def ne_bool(self, items) -> str: 

134 """Transform boolean inequality A != B.""" 

135 filtered = [item for item in items if str(item) != "!="] 

136 if len(filtered) == 2: 136 ↛ 140line 136 didn't jump to line 140 because the condition on line 136 was always true

137 left = str(filtered[0]) 

138 right = str(filtered[1]) 

139 return f"{left} != {right}" 

140 return f"{items[0]} != {items[-1]}" 

141 

142 def bool_atom(self, items) -> str: 

143 """Transform boolean atom (comparison, variable, bool literal, or parenthesized bool expr).""" 

144 # Handle parenthesized expressions: LPAR bool_expr RPAR 

145 # Always preserve parentheses from input 

146 if len(items) == 3 and str(items[0]) == "(" and str(items[2]) == ")": 146 ↛ 148line 146 didn't jump to line 148 because the condition on line 146 was always true

147 return f"({items[1]})" 

148 if len(items) == 1: 

149 return str(items[0]) 

150 return " ".join(str(item) for item in items) 

151 

152 def date_comparison(self, items) -> str: 

153 """Transform date comparison expression.""" 

154 return self._comparison_helper(items) 

155 

156 def int_comparison(self, items) -> str: 

157 """Transform int comparison expression.""" 

158 return self._comparison_helper(items) 

159 

160 def _comparison_helper(self, items) -> str: 

161 """Helper for transforming comparison expressions (both date and int).""" 

162 left, op, right = items 

163 

164 # Convert to strings (items could be Tree objects if not fully transformed) 

165 left_str = str(left) 

166 op_str = str(op) 

167 right_str = str(right) 

168 

169 # Check if we need to transform parametric Date() comparisons 

170 # (Date constructors with variable arguments, e.g., Date(x, 2, 1)) 

171 transformed = self._transform_parametric_date_comparison( 

172 left_str, op_str, right_str 

173 ) 

174 if transformed: 

175 return transformed 

176 return f"{left_str} {op_str} {right_str}" 

177 

178 def bool_literal(self, items) -> str: 

179 """Transform boolean literal True/False.""" 

180 return str(items[0]) 

181 

182 def _extract_date_components(self, expr: str) -> List[str]: 

183 """ 

184 Extract year, month, day arguments from a Date(...) constructor expression. 

185 

186 Args: 

187 expr: An expression that may contain Date(year, month, day) 

188 

189 Returns: 

190 List of [year, month, day] argument strings, or None if not a valid Date() 

191 """ 

192 # Find Date( and match balanced parentheses 

193 date_start = expr.find("Date(") 

194 if date_start == -1: 

195 return None 

196 

197 # Find the opening paren after Date 

198 start = date_start + 5 # len("Date(") 

199 paren_count = 0 

200 args = [] 

201 current_arg = [] 

202 i = start 

203 

204 while i < len(expr): 204 ↛ 225line 204 didn't jump to line 225 because the condition on line 204 was always true

205 char = expr[i] 

206 if char == "(": 206 ↛ 207line 206 didn't jump to line 207 because the condition on line 206 was never true

207 paren_count += 1 

208 current_arg.append(char) 

209 elif char == ")": 

210 if paren_count == 0: 210 ↛ 215line 210 didn't jump to line 215 because the condition on line 210 was always true

211 # This is the closing paren for Date() 

212 if current_arg: 212 ↛ 214line 212 didn't jump to line 214 because the condition on line 212 was always true

213 args.append("".join(current_arg).strip()) 

214 break 

215 paren_count -= 1 

216 current_arg.append(char) 

217 elif char == "," and paren_count == 0: 

218 # This comma separates arguments 

219 args.append("".join(current_arg).strip()) 

220 current_arg = [] 

221 else: 

222 current_arg.append(char) 

223 i += 1 

224 

225 if len(args) == 3: 225 ↛ 227line 225 didn't jump to line 227 because the condition on line 225 was always true

226 return args 

227 return None 

228 

229 @staticmethod 

230 def _has_variable(expr: str) -> bool: 

231 """ 

232 Check if expression contains a variable (not just a literal number). 

233 

234 Used to determine if a Date() constructor is parametric. 

235 """ 

236 expr = expr.strip() 

237 # If it's just a number, it has no variable 

238 if re.match(r"^-?\d+$", expr): 

239 return False 

240 # If it contains identifier characters, it has a variable 

241 return bool(re.search(r"[a-zA-Z_][a-zA-Z0-9_]*", expr)) 

242 

243 def _transform_parametric_date_comparison( 

244 self, left: str, op: str, right: str 

245 ) -> str: 

246 """ 

247 Transform comparisons involving parametric Date() constructors. 

248 

249 A parametric Date() has one or more variable arguments, e.g., Date(x, 2, 1). 

250 This is different from a date variable (e.g., `k` where `k: date`). 

251 

252 Transforms: 

253 k >= Date(x+1, 1, 1) → component-wise comparison on k.year, k.month, k.day 

254 k == Date(x, 1, 2) → And(k.year == x, k.month == 1, k.day == 2) 

255 

256 Args: 

257 left: Left side of comparison (e.g., "k" or "Date(x, 2, 1)") 

258 op: Comparison operator (==, !=, >=, <=, >, <) 

259 right: Right side of comparison 

260 

261 Returns: 

262 Transformed constraint string, or None if no transformation needed 

263 """ 

264 import re 

265 

266 # Check if right side is a Date() constructor 

267 date_components = self._extract_date_components(right) 

268 date_var_expr = left.strip() 

269 swapped = False 

270 

271 if not date_components: 

272 # Check if left side is a Date() constructor 

273 date_components = self._extract_date_components(left) 

274 if date_components: 

275 # Swap left and right, invert operator 

276 date_var_expr = right.strip() 

277 op = self._invert_operator(op) 

278 swapped = True 

279 else: 

280 return None 

281 

282 year_expr, month_expr, day_expr = date_components 

283 

284 # If all components are concrete (no variables), no transformation needed 

285 if not ( 

286 self._has_variable(year_expr) 

287 or self._has_variable(month_expr) 

288 or self._has_variable(day_expr) 

289 ): 

290 return None 

291 

292 # Extract the date variable name (handle property access like x.year) 

293 date_var_match = re.match(r"^([a-zA-Z_][a-zA-Z0-9_]*)", date_var_expr) 

294 if not date_var_match: 294 ↛ 295line 294 didn't jump to line 295 because the condition on line 294 was never true

295 return None 

296 

297 date_var = date_var_match.group(1) 

298 

299 # Transform based on operator 

300 if op == "==": 300 ↛ 303line 300 didn't jump to line 303 because the condition on line 300 was always true

301 # Equality: all components must match 

302 return f"And({date_var}.year == {year_expr}, {date_var}.month == {month_expr}, {date_var}.day == {day_expr})" 

303 elif op == "!=": 

304 # Inequality: at least one component differs 

305 return f"Or({date_var}.year != {year_expr}, {date_var}.month != {month_expr}, {date_var}.day != {day_expr})" 

306 elif op == ">=": 

307 # Greater or equal: lexicographic comparison 

308 return f"Or({date_var}.year > {year_expr}, And({date_var}.year == {year_expr}, Or({date_var}.month > {month_expr}, And({date_var}.month == {month_expr}, {date_var}.day >= {day_expr}))))" 

309 elif op == "<=": 

310 # Less or equal: lexicographic comparison 

311 return f"Or({date_var}.year < {year_expr}, And({date_var}.year == {year_expr}, Or({date_var}.month < {month_expr}, And({date_var}.month == {month_expr}, {date_var}.day <= {day_expr}))))" 

312 elif op == ">": 

313 # Greater: lexicographic comparison (year > OR (year == AND month >) OR (year == AND month == AND day >)) 

314 return f"Or({date_var}.year > {year_expr}, And({date_var}.year == {year_expr}, Or({date_var}.month > {month_expr}, And({date_var}.month == {month_expr}, {date_var}.day > {day_expr}))))" 

315 elif op == "<": 

316 # Less: lexicographic comparison (year < OR (year == AND month <) OR (year == AND month == AND day <)) 

317 return f"Or({date_var}.year < {year_expr}, And({date_var}.year == {year_expr}, Or({date_var}.month < {month_expr}, And({date_var}.month == {month_expr}, {date_var}.day < {day_expr}))))" 

318 

319 return None 

320 

321 def _invert_operator(self, op: str) -> str: 

322 """Invert a comparison operator.""" 

323 inversions = { 

324 ">=": "<=", 

325 "<=": ">=", 

326 ">": "<", 

327 "<": ">", 

328 "==": "==", 

329 "!=": "!=", 

330 } 

331 return inversions.get(op, op) 

332 

333 # Date expression transformers 

334 def date_add_period(self, items) -> str: 

335 """Transform date + period operation.""" 

336 filtered = [item for item in items if str(item) != "+"] 

337 if len(filtered) == 2: 337 ↛ 339line 337 didn't jump to line 339 because the condition on line 337 was always true

338 return f"{filtered[0]} + {filtered[1]}" 

339 return f"{items[0]} + {items[-1]}" 

340 

341 def date_sub_period(self, items) -> str: 

342 """Transform date - period operation.""" 

343 filtered = [item for item in items if str(item) != "-"] 

344 if len(filtered) == 2: 344 ↛ 346line 344 didn't jump to line 346 because the condition on line 344 was always true

345 return f"{filtered[0]} - {filtered[1]}" 

346 return f"{items[0]} - {items[-1]}" 

347 

348 def date_atom(self, items) -> str: 

349 """Transform date atom (handle parenthesized expressions).""" 

350 # If it's just one item, return it 

351 if len(items) == 1: 351 ↛ 352line 351 didn't jump to line 352 because the condition on line 351 was never true

352 return str(items[0]) 

353 # If it's LPAR expr RPAR, preserve parentheses for nested expressions 

354 if len(items) == 3 and str(items[0]) == "(" and str(items[2]) == ")": 354 ↛ 357line 354 didn't jump to line 357 because the condition on line 354 was always true

355 return f"({items[1]})" 

356 # Otherwise return all items joined 

357 return " ".join(str(item) for item in items) 

358 

359 # Period expression transformers 

360 def period_add(self, items) -> str: 

361 """Transform period + period operation.""" 

362 filtered = [item for item in items if str(item) != "+"] 

363 if len(filtered) == 2: 

364 return f"{filtered[0]} + {filtered[1]}" 

365 return f"{items[0]} + {items[-1]}" 

366 

367 def period_sub(self, items) -> str: 

368 """Transform period - period operation.""" 

369 filtered = [item for item in items if str(item) != "-"] 

370 if len(filtered) == 2: 

371 return f"{filtered[0]} - {filtered[1]}" 

372 return f"{items[0]} - {items[-1]}" 

373 

374 def int_mul_period(self, items) -> str: 

375 """Transform int * period operation.""" 

376 filtered = [item for item in items if str(item) != "*"] 

377 if len(filtered) == 2: 

378 return f"{filtered[0]} * {filtered[1]}" 

379 return f"{items[0]} * {items[-1]}" 

380 

381 def period_mul_int(self, items) -> str: 

382 """Transform period * int operation.""" 

383 filtered = [item for item in items if str(item) != "*"] 

384 if len(filtered) == 2: 

385 return f"{filtered[0]} * {filtered[1]}" 

386 return f"{items[0]} * {items[-1]}" 

387 

388 def period_atom(self, items) -> str: 

389 """Transform period atom (handle parenthesized expressions).""" 

390 if len(items) == 1: 

391 return str(items[0]) 

392 if len(items) == 3 and str(items[0]) == "(" and str(items[2]) == ")": 

393 return f"({items[1]})" 

394 return " ".join(str(item) for item in items) 

395 

396 # Integer expression transformers 

397 def int_add(self, items) -> str: 

398 """Transform integer addition operation.""" 

399 filtered = [item for item in items if str(item) != "+"] 

400 if len(filtered) == 2: 400 ↛ 402line 400 didn't jump to line 402 because the condition on line 400 was always true

401 return f"{filtered[0]} + {filtered[1]}" 

402 return f"{items[0]} + {items[-1]}" 

403 

404 def int_sub(self, items) -> str: 

405 """Transform integer subtraction operation.""" 

406 filtered = [item for item in items if str(item) != "-"] 

407 if len(filtered) == 2: 407 ↛ 409line 407 didn't jump to line 409 because the condition on line 407 was always true

408 return f"{filtered[0]} - {filtered[1]}" 

409 return f"{items[0]} - {items[-1]}" 

410 

411 def int_mul(self, items) -> str: 

412 """Transform integer multiplication (linear only: const * var or var * const).""" 

413 filtered = [item for item in items if str(item) != "*"] 

414 if len(filtered) == 2: 414 ↛ 418line 414 didn't jump to line 418 because the condition on line 414 was always true

415 left = str(filtered[0]) 

416 right = str(filtered[1]) 

417 else: 

418 left = str(items[0]) 

419 right = str(items[-1]) 

420 

421 # Validate linearity: one side must be a constant 

422 left_is_const = self._is_constant(left) 

423 right_is_const = self._is_constant(right) 

424 

425 if not (left_is_const or right_is_const): 425 ↛ 426line 425 didn't jump to line 426 because the condition on line 425 was never true

426 raise ValueError( 

427 f"Nonlinear arithmetic not allowed: '{left} * {right}'. " 

428 f"Multiplication must be between a constant and a variable (e.g., '5 * x' or 'x * 5')." 

429 ) 

430 

431 return f"{left} * {right}" 

432 

433 def int_neg(self, items) -> str: 

434 """Transform integer negation operation.""" 

435 filtered = [item for item in items if str(item) != "-"] 

436 if filtered: 436 ↛ 438line 436 didn't jump to line 438 because the condition on line 436 was always true

437 return f"-{filtered[0]}" 

438 return f"-{items[-1]}" 

439 

440 def int_atom(self, items) -> str: 

441 """Transform int atom (handle parenthesized expressions). Always preserve parentheses.""" 

442 if len(items) == 1: 

443 return str(items[0]) 

444 if len(items) == 3 and str(items[0]) == "(" and str(items[2]) == ")": 

445 return f"({items[1]})" 

446 return " ".join(str(item) for item in items) 

447 

448 @staticmethod 

449 def _is_constant(expr: str) -> bool: 

450 """ 

451 Check if an expression is a constant (signed number). 

452 

453 Args: 

454 expr: Expression string to check 

455 

456 Returns: 

457 True if the expression is a constant number, False otherwise 

458 """ 

459 expr = expr.strip() 

460 # Remove parentheses if present 

461 if expr.startswith("(") and expr.endswith(")"): 461 ↛ 462line 461 didn't jump to line 462 because the condition on line 461 was never true

462 expr = expr[1:-1].strip() 

463 # Check if it's a signed number 

464 return bool(re.match(r"^-?\d+$", expr)) 

465 

466 # Variable transformer (unified, type checking happens at usage) 

467 def variable(self, items) -> str: 

468 """Transform variable reference.""" 

469 return str(items[0]) 

470 

471 def _check_variable_type( 

472 self, var_name: str, expected_type: str, context: str 

473 ) -> None: 

474 """ 

475 Check if a variable is used in the correct type context. 

476 

477 Args: 

478 var_name: Name of the variable 

479 expected_type: Expected type ('date', 'int', or 'bool') 

480 context: Context where the variable is used (for error messages) 

481 

482 Raises: 

483 ValueError: If variable type doesn't match expected type 

484 """ 

485 if var_name not in self.variable_types: 

486 # Variable not in type map - will be caught later by undeclared variable check 

487 return 

488 

489 actual_type = self.variable_types[var_name] 

490 if actual_type != expected_type: 

491 raise ValueError( 

492 f"Type error: Variable '{var_name}' has type '{actual_type}' but is used as '{expected_type}' " 

493 f"in context '{context}'. Variables must be used consistently with their declared type." 

494 ) 

495 

496 # Date field access (property access on dates) 

497 def date_field_access(self, items) -> str: 

498 """Transform date field access like x.year, x.month, x.day, or Date(...).year.""" 

499 # items = [date_variable/date_constructor, DOT, date_field] 

500 date_expr = items[0] 

501 field_name = items[2] 

502 return f"{date_expr}.{field_name}" 

503 

504 def date_field(self, items) -> str: 

505 """Transform date field name (year, month, day).""" 

506 return str(items[0]) 

507 

508 # Constructors 

509 def date_constructor(self, items) -> str: 

510 """Transform Date constructor (arguments are int_expr).""" 

511 # items = [LPAR, year_int_expr, month_int_expr, day_int_expr, RPAR] 

512 # Filter out parentheses and commas if present 

513 args = [str(item) for item in items if str(item) not in ["(", ")", ","]] 

514 if len(args) >= 3: 514 ↛ 517line 514 didn't jump to line 517 because the condition on line 514 was always true

515 return f"Date({args[0]}, {args[1]}, {args[2]})" 

516 # Fallback: assume items are [year, month, day] 

517 return f"Date({items[0]}, {items[1]}, {items[2]})" 

518 

519 def period_constructor(self, items) -> str: 

520 """Transform Period constructor (arguments are int_expr).""" 

521 # items = [LPAR, years_int_expr, months_int_expr, days_int_expr, RPAR] 

522 # Filter out parentheses and commas if present 

523 args = [str(item) for item in items if str(item) not in ["(", ")", ","]] 

524 if len(args) >= 3: 524 ↛ 527line 524 didn't jump to line 527 because the condition on line 524 was always true

525 return f"Period({args[0]}, {args[1]}, {args[2]})" 

526 # Fallback: assume items are [years, months, days] 

527 return f"Period({items[0]}, {items[1]}, {items[2]})" 

528 

529 def comparison_op(self, items) -> str: 

530 """Transform comparison operator.""" 

531 if not items: 531 ↛ 532line 531 didn't jump to line 532 because the condition on line 531 was never true

532 return "" 

533 return str(items[0]) 

534 

535 def int_const(self, items) -> str: 

536 """Transform integer constant.""" 

537 return str(items[0]) 

538 

539 # Legacy methods for backward compatibility 

540 def number(self, items) -> str: 

541 """Transform number literal (legacy).""" 

542 return str(items[0]) 

543 

544 def string(self, items) -> str: 

545 """Transform string literal (legacy).""" 

546 return str(items[0]) 

547 

548 

549class ConstraintParser: 

550 """Parser for structured constraint format using Lark grammar.""" 

551 

552 def __init__(self): 

553 """Initialize the parser with Lark grammar.""" 

554 self.variable_types: Dict[str, str] = ( 

555 {} 

556 ) # Maps variable name to type: 'date', 'int', or 'bool' 

557 

558 # Define the type-safe grammar for constraint parsing 

559 # Type safety is enforced by construction: int_expr, date_expr, period_expr, bool_expr 

560 self.grammar = r""" 

561 constraint: bool_expr 

562 

563 // Boolean expression hierarchy with proper precedence 

564 ?bool_expr: implication 

565  

566 ?implication: equality 

567 | equality IMPLIES implication -> implication 

568  

569 ?equality: disjunction 

570 | disjunction EQ disjunction -> eq_bool 

571 | disjunction NE disjunction -> ne_bool 

572  

573 ?disjunction: conjunction 

574 | disjunction OR conjunction -> or_op 

575  

576 ?conjunction: negation 

577 | conjunction AND negation -> and_op 

578  

579 ?negation: NOT negation -> not_op 

580 | bool_atom 

581  

582 ?bool_atom: date_comparison 

583 | int_comparison 

584 | bool_literal 

585 | variable 

586 | LPAR bool_expr RPAR 

587 

588 // Type-safe comparisons: dates compare with dates, ints compare with ints 

589 // Note: Variables can be either type, so both rules can match - Earley parser handles ambiguity 

590 // The transformer will resolve based on inferred variable types 

591 date_comparison: date_expr comparison_op date_expr 

592 int_comparison: int_expr comparison_op int_expr 

593 

594 comparison_op: LT | LTE | GT | GTE | EQ | NE 

595 LT: "<" 

596 LTE: "<=" 

597 GT: ">" 

598 GTE: ">=" 

599 EQ: "==" 

600 NE: "!=" 

601 

602 // Date expressions: dates can be combined with periods 

603 ?date_expr: date_expr PLUS period_expr -> date_add_period 

604 | date_expr MINUS period_expr -> date_sub_period 

605 | date_atom 

606  

607 ?date_atom: date_constructor 

608 | variable 

609 | LPAR date_expr RPAR 

610 

611 date_constructor: "Date" LPAR int_expr "," int_expr "," int_expr RPAR 

612 

613 // Period expressions: periods can be combined with periods or scaled by concrete integers only 

614 ?period_expr: period_expr PLUS period_expr -> period_add 

615 | period_expr MINUS period_expr -> period_sub 

616 | int_const STAR period_expr -> int_mul_period 

617 | period_expr STAR int_const -> period_mul_int 

618 | period_atom 

619  

620 ?period_atom: period_constructor 

621 | LPAR period_expr RPAR 

622 

623 period_constructor: "Period" LPAR int_const "," int_const "," int_const RPAR 

624 

625 // Integer expressions: linear arithmetic 

626 ?int_expr: int_expr PLUS int_term -> int_add 

627 | int_expr MINUS int_term -> int_sub 

628 | int_term 

629  

630 ?int_term: int_const STAR int_term -> int_mul 

631 | int_term STAR int_const -> int_mul 

632 | int_factor 

633  

634 ?int_factor: MINUS int_factor -> int_neg 

635 | int_atom 

636  

637 ?int_atom: int_const 

638 | variable 

639 | date_field_access 

640 | LPAR int_expr RPAR 

641 

642 int_const: SIGNED_NUMBER 

643 

644 // Property access on dates (extracts year, month, or day as integer) 

645 // Allows field access on any date expression, e.g., (d + Period(1, 0, 0)).year 

646 date_field_access: date_expr DOT date_field 

647  

648 date_field: DATE_FIELD 

649 DATE_FIELD: "year" | "month" | "day" 

650 

651 // Unified variable rule (type checking happens in transformer) 

652 variable: CNAME 

653  

654 // Boolean literals 

655 bool_literal: BOOL_LITERAL 

656 BOOL_LITERAL: "True" | "False" 

657 

658 // Operators 

659 IMPLIES: "->" 

660 OR: "||" | /\bor\b/i | /\bOR\b/ 

661 AND: "&&" | /\band\b/i | /\bAND\b/ 

662 NOT: "!" | /\bnot\b/i | /\bNOT\b/ 

663 PLUS: "+" 

664 MINUS: "-" 

665 STAR: "*" 

666 LPAR: "(" 

667 RPAR: ")" 

668 DOT: "." 

669 

670 %import common.CNAME 

671 %import common.SIGNED_NUMBER 

672 %import common.WS 

673 %ignore WS 

674 """ 

675 

676 # Create the Lark parser with Earley algorithm (handles ambiguities better) 

677 self.parser = Lark( 

678 self.grammar, 

679 parser="earley", 

680 start="constraint", 

681 ) 

682 # Note: transformer will be recreated in generate_builder_code() with variable_types 

683 self.transformer = None 

684 

685 def _validate_parentheses_balance(self, constraint_str: str) -> None: 

686 """ 

687 Validate that parentheses are balanced in the constraint. 

688 

689 Raises: 

690 ValueError: If parentheses are unbalanced with helpful error message 

691 """ 

692 if not constraint_str: 

693 return 

694 

695 depth = 0 

696 position = 0 

697 

698 for i, char in enumerate(constraint_str): 

699 if char == "(": 

700 depth += 1 

701 elif char == ")": 

702 depth -= 1 

703 if depth < 0: 703 ↛ 705line 703 didn't jump to line 705 because the condition on line 703 was never true

704 # More closing than opening parens at this position 

705 context_start = max(0, i - 20) 

706 context_end = min(len(constraint_str), i + 20) 

707 context = constraint_str[context_start:context_end] 

708 pointer = " " * (i - context_start) + "^" 

709 raise ValueError( 

710 f"Unbalanced parentheses: found closing ')' without matching opening '(' at position {i}\n" 

711 f" {context}\n" 

712 f" {pointer}" 

713 ) 

714 position = i 

715 

716 if depth > 0: 716 ↛ 719line 716 didn't jump to line 719 because the condition on line 716 was never true

717 # More opening than closing parens 

718 # Find the first unmatched opening paren 

719 temp_depth = 0 

720 first_unmatched = -1 

721 for i, char in enumerate(constraint_str): 

722 if char == "(": 

723 if temp_depth == 0: 

724 first_unmatched = i 

725 temp_depth += 1 

726 elif char == ")": 

727 temp_depth -= 1 

728 if temp_depth == 0: 

729 first_unmatched = -1 

730 

731 context_start = max(0, first_unmatched - 20) 

732 context_end = min(len(constraint_str), first_unmatched + 40) 

733 context = constraint_str[context_start:context_end] 

734 pointer = " " * (first_unmatched - context_start) + "^" 

735 raise ValueError( 

736 f"Unbalanced parentheses: {depth} unclosed opening '(' found, starting at position {first_unmatched}\n" 

737 f" {context}\n" 

738 f" {pointer}" 

739 ) 

740 

741 def parse_constraint( 

742 self, constraint_str: str, variable_types: Dict[str, str] = None 

743 ) -> str: 

744 """ 

745 Parse a single constraint string and return the corresponding Python code. 

746 

747 Args: 

748 constraint_str: Constraint string like "x>=Date(2000,2,28)" 

749 variable_types: Optional dictionary mapping variable names to types 

750 

751 Returns: 

752 Python code string for the constraint 

753 """ 

754 # Remove whitespace 

755 constraint_str = constraint_str.strip() 

756 

757 # Check for unbalanced parentheses 

758 self._validate_parentheses_balance(constraint_str) 

759 

760 # Check for invalid variable names that should raise ValueError 

761 if self._is_invalid_variable_name(constraint_str): 

762 raise ValueError( 

763 f"Could not parse constraint '{constraint_str}': Invalid variable name" 

764 ) 

765 try: 

766 # Create transformer with variable types 

767 transformer = ConstraintTransformer(variable_types or self.variable_types) 

768 

769 # Parse using Lark 

770 tree = self.parser.parse(constraint_str) 

771 

772 # Validate that the parse tree is consistent with declared types 

773 # This catches cases where grammar is ambiguous but declared types make it clear 

774 if variable_types or self.variable_types: 

775 self._validate_parse_tree_types( 

776 tree, variable_types or self.variable_types 

777 ) 

778 

779 # Apply transformer 

780 result = transformer.transform(tree) 

781 return result 

782 except Exception as e: 

783 raise ValueError(f"Could not parse constraint '{constraint_str}': {e}") 

784 

785 def _is_invalid_variable_name(self, constraint_str: str) -> bool: 

786 """Check if the constraint contains invalid variable names.""" 

787 # Handle empty or whitespace-only strings 

788 if not constraint_str or constraint_str.isspace(): 

789 return False 

790 

791 # Property names that are valid after a dot 

792 valid_property_names = {"year", "month", "day"} 

793 

794 # Check for invalid variable names like var-123, var.123, 123var 

795 # But exclude cases where hyphen/dot is followed by keywords like Period/Date or '(' 

796 # This distinguishes "var-123" (invalid) from "z-Period(...)" (valid subtraction) 

797 pattern1 = r"\b[a-zA-Z_][a-zA-Z0-9_]*[-.][a-zA-Z0-9_]+\b" 

798 matches = re.finditer(pattern1, constraint_str) 

799 for match in matches: 

800 matched_text = match.group() 

801 # Check if the part after hyphen/dot is a keyword (Period, Date) or a property name 

802 parts = re.split(r"[-.]", matched_text, maxsplit=1) 

803 if len(parts) == 2: 803 ↛ 799line 803 didn't jump to line 799 because the condition on line 803 was always true

804 second_part = parts[1] 

805 # If it's a keyword or property name, this is valid 

806 if ( 

807 second_part in ["Period", "Date"] 

808 or second_part in valid_property_names 

809 ): 

810 continue 

811 # Check if followed by '(' which would indicate a function call 

812 end_pos = match.end() 

813 if end_pos < len(constraint_str) and constraint_str[end_pos] == "(": 813 ↛ 814line 813 didn't jump to line 814 because the condition on line 813 was never true

814 continue 

815 # Otherwise, it's an invalid variable name 

816 return True 

817 

818 # Check for invalid variable names starting with digits like 123var 

819 if re.search(r"\b[0-9]+[a-zA-Z_][a-zA-Z0-9_]*\b", constraint_str): 

820 return True 

821 return False 

822 

823 def extract_variable_declarations(self, constraints: List[str]) -> Dict[str, str]: 

824 """ 

825 Extract variable declarations from constraints. 

826 Looks for patterns like "x: date", "y: int", "z: bool". 

827 

828 Args: 

829 constraints: List of constraint strings 

830 

831 Returns: 

832 Dictionary mapping variable names to their types ('date', 'int', or 'bool') 

833 """ 

834 declarations = {} 

835 # Pattern to match variable declarations: "variable_name: type" 

836 # where type is date, int, or bool 

837 declaration_pattern = r"^\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:\s*(date|int|bool)\s*$" 

838 

839 for constraint in constraints: 

840 constraint = constraint.strip() 

841 match = re.match(declaration_pattern, constraint, re.IGNORECASE) 

842 if match: 

843 var_name = match.group(1) 

844 var_type = match.group(2).lower() # Normalize to lowercase 

845 if var_type in ["date", "int", "bool"]: 845 ↛ 839line 845 didn't jump to line 839 because the condition on line 845 was always true

846 declarations[var_name] = var_type 

847 

848 return declarations 

849 

850 def filter_declarations_from_constraints(self, constraints: List[str]) -> List[str]: 

851 """ 

852 Filter out variable declarations from the constraints list. 

853 

854 Args: 

855 constraints: List of constraint strings 

856 

857 Returns: 

858 Filtered list with declarations removed 

859 """ 

860 filtered = [] 

861 declaration_pattern = r"^\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:\s*(date|int|bool)\s*$" 

862 

863 for constraint in constraints: 

864 constraint = constraint.strip() 

865 if not re.match(declaration_pattern, constraint, re.IGNORECASE): 

866 filtered.append(constraint) 

867 

868 return filtered 

869 

870 def extract_variables_from_constraints(self, constraints: List[str]) -> List[str]: 

871 """ 

872 Extract all variable names from constraints. 

873 

874 Args: 

875 constraints: List of constraint strings 

876 

877 Returns: 

878 Sorted list of unique variable names found in constraints 

879 """ 

880 variables = set() 

881 

882 for constraint in constraints: 

883 # Remove property access patterns (var.property) to avoid 

884 # extracting property names as variables 

885 # Replace patterns like ".year", ".month", ".day" with empty string 

886 cleaned_constraint = re.sub(r"\.(?:year|month|day)\b", "", constraint) 

887 

888 # Find all potential variable names using regex 

889 # This matches CNAME pattern: [a-zA-Z_][a-zA-Z0-9_]* 

890 var_matches = re.findall(r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", cleaned_constraint) 

891 

892 for match in var_matches: 

893 # Filter out keywords and constructors 

894 if match not in [ 

895 "Date", 

896 "Period", 

897 "And", 

898 "Or", 

899 "Not", 

900 "Implies", 

901 "and", 

902 "or", 

903 "not", 

904 "True", 

905 "False", 

906 ]: 

907 variables.add(match) 

908 

909 return sorted(list(variables)) 

910 

911 def infer_variable_types_from_context( 

912 self, constraints: List[str], skip_declared: Dict[str, str] = None 

913 ) -> Dict[str, str]: 

914 """ 

915 Infer variable types from their usage context in constraints using the parse tree. 

916 

917 This method parses each constraint and walks the parse tree to infer types: 

918 - Variables in date_expr context → 'date' 

919 - Variables in int_expr context → 'int' 

920 - Variables in bool_expr context (standalone) → 'bool' 

921 - Variables in Date()/Period() constructors → 'int' 

922 

923 Args: 

924 constraints: List of constraint strings 

925 skip_declared: Dictionary of already-declared variables to skip inference for 

926 

927 Returns: 

928 Dictionary mapping inferred variable names to their types (excludes skip_declared vars) 

929 """ 

930 inferred_types = {} 

931 skip_declared = skip_declared or {} 

932 

933 for constraint in constraints: 

934 try: 

935 # Parse the constraint to get the tree structure 

936 tree = self.parser.parse(constraint) 

937 

938 # Walk the tree to infer variable types from context 

939 self._infer_types_from_tree(tree, inferred_types, skip_declared) 

940 

941 except Exception: 

942 # If parsing fails, fall back to regex-based inference for Date() args 

943 # Find variables inside Date() constructor arguments 

944 date_pattern = ( 

945 r"Date\s*\(\s*([^,)]+)\s*,\s*([^,)]+)\s*,\s*([^,)]+)\s*\)" 

946 ) 

947 date_matches = re.finditer(date_pattern, constraint) 

948 for match in date_matches: 

949 for arg in [match.group(1), match.group(2), match.group(3)]: 

950 var_names = re.findall(r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", arg) 

951 for var_name in var_names: 

952 if var_name not in [ 

953 "Date", 

954 "Period", 

955 "And", 

956 "Or", 

957 "Not", 

958 "Implies", 

959 "and", 

960 "or", 

961 "not", 

962 "True", 

963 "False", 

964 "year", 

965 "month", 

966 "day", 

967 ]: 

968 if var_name not in skip_declared: 

969 property_access_pattern = rf"\b{re.escape(var_name)}\s*\.\s*(?:year|month|day)\b" 

970 if not re.search(property_access_pattern, arg): 

971 inferred_types[var_name] = "int" 

972 

973 return inferred_types 

974 

975 def _validate_parse_tree_types(self, tree, variable_types: Dict[str, str]) -> None: 

976 """ 

977 Validate that the parse tree is consistent with declared variable types. 

978 

979 This is called after parsing but before transformation to reject ambiguous 

980 parses that don't match declared types. 

981 

982 Args: 

983 tree: Lark parse tree 

984 variable_types: Dictionary of declared variable types 

985 

986 Raises: 

987 ValueError: If declared types conflict with parse tree structure 

988 """ 

989 from lark import Token, Tree 

990 

991 if isinstance(tree, Token): 991 ↛ 992line 991 didn't jump to line 992 because the condition on line 991 was never true

992 return 

993 

994 if not isinstance(tree, Tree): 994 ↛ 995line 994 didn't jump to line 995 because the condition on line 994 was never true

995 return 

996 

997 rule = tree.data 

998 

999 # Check comparisons for type consistency 

1000 if rule in ["date_comparison", "int_comparison"]: 

1001 # Collect all declared variables and check for mixed types or concrete type indicators 

1002 declared_vars = [] 

1003 has_date_literal = False 

1004 has_int_literal = False 

1005 

1006 for child in tree.children: 

1007 if isinstance(child, Tree): 1007 ↛ 1006line 1007 didn't jump to line 1006 because the condition on line 1007 was always true

1008 if child.data == "variable": 

1009 var_name = str(child.children[0]) 

1010 if var_name in variable_types: 

1011 declared_vars.append((var_name, variable_types[var_name])) 

1012 elif child.data == "date_constructor": 

1013 has_date_literal = True 

1014 elif child.data == "int_const": 

1015 has_int_literal = True 

1016 

1017 # If there are concrete literals, enforce strict type matching 

1018 if has_date_literal: 

1019 # Date literal present - all variables must be date type 

1020 for var_name, var_type in declared_vars: 

1021 if var_type != "date": 

1022 raise ValueError( 

1023 f"Type error: Cannot compare {var_type} variable '{var_name}' with Date literal" 

1024 ) 

1025 elif has_int_literal: 

1026 # Int literal present - all variables must be int/bool type 

1027 for var_name, var_type in declared_vars: 

1028 if var_type not in ["int", "bool"]: 1028 ↛ 1029line 1028 didn't jump to line 1029 because the condition on line 1028 was never true

1029 raise ValueError( 

1030 f"Type error: Cannot compare {var_type} variable '{var_name}' with integer literal" 

1031 ) 

1032 

1033 # If only variables (no literals), check they're all the same type 

1034 elif len(declared_vars) >= 2: 

1035 first_type = declared_vars[0][1] 

1036 for var_name, var_type in declared_vars[1:]: 

1037 if var_type != first_type: 

1038 raise ValueError( 

1039 f"Type error: Cannot compare {first_type} variable '{declared_vars[0][0]}' " 

1040 f"with {var_type} variable '{var_name}'" 

1041 ) 

1042 

1043 # Recurse into children 

1044 for child in tree.children: 

1045 if isinstance(child, Tree): 1045 ↛ 1044line 1045 didn't jump to line 1044 because the condition on line 1045 was always true

1046 self._validate_parse_tree_types(child, variable_types) 

1047 

1048 # Recurse into all children 

1049 else: 

1050 for child in tree.children: 

1051 if isinstance(child, Tree): 

1052 self._validate_parse_tree_types(child, variable_types) 

1053 

1054 def _infer_types_from_tree( 

1055 self, tree, inferred_types: Dict[str, str], skip_declared: Dict[str, str] = None 

1056 ) -> None: 

1057 """ 

1058 Walk the parse tree and infer variable types from their context. 

1059 

1060 Args: 

1061 tree: Lark parse tree node 

1062 inferred_types: Dictionary to populate with inferred types 

1063 skip_declared: Dictionary of already-declared variables to skip 

1064 """ 

1065 from lark import Token, Tree 

1066 

1067 skip_declared = skip_declared or {} 

1068 

1069 if isinstance(tree, Token): 1069 ↛ 1070line 1069 didn't jump to line 1070 because the condition on line 1069 was never true

1070 return 

1071 

1072 if not isinstance(tree, Tree): 1072 ↛ 1073line 1072 didn't jump to line 1073 because the condition on line 1072 was never true

1073 return 

1074 

1075 # Check the rule name to determine context 

1076 rule = tree.data 

1077 

1078 # Handle date comparisons - all variables must be dates 

1079 if rule == "date_comparison": 

1080 for child in tree.children: 

1081 if isinstance(child, Tree): 1081 ↛ 1080line 1081 didn't jump to line 1080 because the condition on line 1081 was always true

1082 if child.data == "variable": 

1083 var_name = str(child.children[0]) 

1084 # Skip True/False as they're boolean literals, not variables 

1085 if var_name in ["True", "False"]: 

1086 continue 

1087 # Skip already-declared variables 

1088 if var_name in skip_declared: 

1089 continue 

1090 if var_name not in inferred_types: 1090 ↛ 1080line 1090 didn't jump to line 1080 because the condition on line 1090 was always true

1091 inferred_types[var_name] = "date" 

1092 else: 

1093 # Recurse to handle nested expressions 

1094 self._infer_types_from_tree( 

1095 child, inferred_types, skip_declared 

1096 ) 

1097 return 

1098 

1099 # Handle int comparisons - all variables must be ints (or check for bool literals) 

1100 if rule == "int_comparison": 

1101 # Check for boolean literals in int comparisons (e.g., x == True) 

1102 has_bool_literal = any( 

1103 isinstance(child, Tree) 

1104 and ( 

1105 ( 

1106 child.data == "variable" 

1107 and str(child.children[0]) in ["True", "False"] 

1108 ) 

1109 or child.data == "bool_literal" 

1110 ) 

1111 for child in tree.children 

1112 ) 

1113 

1114 for child in tree.children: 

1115 if isinstance(child, Tree): 1115 ↛ 1114line 1115 didn't jump to line 1114 because the condition on line 1115 was always true

1116 if child.data == "variable": 

1117 var_name = str(child.children[0]) 

1118 # Skip True/False as they're boolean literals, not variables 

1119 if var_name in ["True", "False"]: 1119 ↛ 1120line 1119 didn't jump to line 1120 because the condition on line 1119 was never true

1120 continue 

1121 # Skip already-declared variables 

1122 if var_name in skip_declared: 1122 ↛ 1124line 1122 didn't jump to line 1124 because the condition on line 1122 was always true

1123 continue 

1124 if var_name not in inferred_types: 

1125 # If comparing with bool literal, infer as bool 

1126 if has_bool_literal: 

1127 inferred_types[var_name] = "bool" 

1128 else: 

1129 inferred_types[var_name] = "int" 

1130 else: 

1131 # Recurse to handle nested expressions 

1132 self._infer_types_from_tree( 

1133 child, inferred_types, skip_declared 

1134 ) 

1135 return 

1136 

1137 # Variables in date expressions 

1138 if rule in ["date_expr", "date_atom", "date_add_period", "date_sub_period"]: 

1139 for child in tree.children: 

1140 if isinstance(child, Tree) and child.data == "variable": 

1141 var_name = str(child.children[0]) 

1142 # Skip True/False as they're boolean literals 

1143 if var_name in ["True", "False"]: 1143 ↛ 1144line 1143 didn't jump to line 1144 because the condition on line 1143 was never true

1144 continue 

1145 if var_name not in inferred_types: 

1146 inferred_types[var_name] = "date" 

1147 

1148 # Variables in integer expressions 

1149 elif rule in [ 

1150 "int_expr", 

1151 "int_term", 

1152 "int_atom", 

1153 "int_add", 

1154 "int_sub", 

1155 "int_mul", 

1156 "int_neg", 

1157 ]: 

1158 for child in tree.children: 

1159 if isinstance(child, Tree) and child.data == "variable": 

1160 var_name = str(child.children[0]) 

1161 # Skip True/False as they're boolean literals 

1162 if var_name in ["True", "False"]: 1162 ↛ 1163line 1162 didn't jump to line 1163 because the condition on line 1162 was never true

1163 continue 

1164 # Only infer as int if not already inferred as date 

1165 if var_name not in inferred_types: 

1166 inferred_types[var_name] = "int" 

1167 

1168 # Variables in Date/Period constructors are always int 

1169 elif rule in ["date_constructor", "period_constructor"]: 

1170 for child in tree.children: 

1171 if isinstance(child, Tree): 

1172 self._extract_vars_as_int(child, inferred_types) 

1173 

1174 # Date field access (k.year, k.month, k.day) 

1175 elif rule == "date_field_access": 

1176 # Variable with property access is a date 

1177 for child in tree.children: 

1178 if isinstance(child, Tree) and child.data == "variable": 

1179 var_name = str(child.children[0]) 

1180 # Skip True/False as they're boolean literals 

1181 if var_name in ["True", "False"]: 1181 ↛ 1182line 1181 didn't jump to line 1182 because the condition on line 1181 was never true

1182 continue 

1183 if var_name not in inferred_types: 

1184 inferred_types[var_name] = "date" 

1185 

1186 # Standalone boolean variables (at constraint level) 

1187 elif rule == "constraint": 

1188 # Check if this is just a standalone variable (bool) 

1189 if len(tree.children) == 1: 1189 ↛ 1200line 1189 didn't jump to line 1200 because the condition on line 1189 was always true

1190 child = tree.children[0] 

1191 if isinstance(child, Tree) and child.data == "variable": 1191 ↛ 1192line 1191 didn't jump to line 1192 because the condition on line 1191 was never true

1192 var_name = str(child.children[0]) 

1193 # Skip True/False as they're boolean literals 

1194 if var_name in ["True", "False"]: 

1195 pass # Don't infer 

1196 elif var_name not in inferred_types: 

1197 inferred_types[var_name] = "bool" 

1198 

1199 # Recurse into children 

1200 for child in tree.children: 

1201 if isinstance(child, Tree): 

1202 self._infer_types_from_tree(child, inferred_types, skip_declared) 

1203 

1204 def _extract_vars_as_int(self, tree, inferred_types: Dict[str, str]) -> None: 

1205 """Extract all variables in a subtree as 'int' type. 

1206 

1207 Skip variables that are part of date_field_access (e.g., x in x.year), 

1208 as those should remain as date type. 

1209 """ 

1210 from lark import Token, Tree 

1211 

1212 if isinstance(tree, Token): 1212 ↛ 1213line 1212 didn't jump to line 1213 because the condition on line 1212 was never true

1213 return 

1214 

1215 if isinstance(tree, Tree): 1215 ↛ exitline 1215 didn't return from function '_extract_vars_as_int' because the condition on line 1215 was always true

1216 # Skip date_field_access nodes - the variable in x.year should remain a date 

1217 if tree.data == "date_field_access": 1217 ↛ 1218line 1217 didn't jump to line 1218 because the condition on line 1217 was never true

1218 return 

1219 

1220 if tree.data == "variable": 

1221 var_name = str(tree.children[0]) 

1222 # Skip True/False as they're boolean literals 

1223 if var_name not in ["True", "False"]: 1223 ↛ 1226line 1223 didn't jump to line 1226 because the condition on line 1223 was always true

1224 inferred_types[var_name] = "int" 

1225 

1226 for child in tree.children: 

1227 if isinstance(child, Tree): 

1228 self._extract_vars_as_int(child, inferred_types) 

1229 

1230 def infer_date_component_bounds(self, constraints: List[str]) -> Dict[str, str]: 

1231 """ 

1232 Infer which Date() component position each integer expression is used in. 

1233 

1234 This allows us to automatically add appropriate bounds constraints: 

1235 - Position 1 (year): 1900 <= expr <= 2100 

1236 - Position 2 (month): 1 <= expr <= 12 

1237 - Position 3 (day): 1 <= expr <= 31 

1238 

1239 For example, Date(x+2000, 1, 1) will generate: 

1240 - (x + 2000) >= 1900 

1241 - (x + 2000) <= 2100 

1242 

1243 Args: 

1244 constraints: List of constraint strings 

1245 

1246 Returns: 

1247 Dictionary mapping expressions to their component type ('year', 'month', 'day', or 'mixed') 

1248 'mixed' means the expression is used in multiple positions 

1249 """ 

1250 component_usage = {} # expr -> set of component types 

1251 

1252 for constraint in constraints: 

1253 # Find Date() constructors and their arguments 

1254 date_pattern = r"Date\s*\(\s*([^,)]+)\s*,\s*([^,)]+)\s*,\s*([^,)]+)\s*\)" 

1255 date_matches = re.finditer(date_pattern, constraint) 

1256 

1257 for match in date_matches: 

1258 args = [ 

1259 match.group(1).strip(), 

1260 match.group(2).strip(), 

1261 match.group(3).strip(), 

1262 ] 

1263 component_types = ["year", "month", "day"] 

1264 

1265 for arg, component_type in zip(args, component_types): 

1266 # Skip if this is a concrete constant (no variables) 

1267 has_variables = bool(re.search(r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", arg)) 

1268 if not has_variables: 

1269 continue 

1270 

1271 # Skip if this is a property access (e.g., x.year) 

1272 if re.search( 1272 ↛ 1275line 1272 didn't jump to line 1275 because the condition on line 1272 was never true

1273 r"\b[a-zA-Z_][a-zA-Z0-9_]*\s*\.\s*(?:year|month|day)\b", arg 

1274 ): 

1275 continue 

1276 

1277 # Check if the expression contains any keywords that should be excluded 

1278 expr_vars = re.findall(r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", arg) 

1279 if any( 1279 ↛ 1299line 1279 didn't jump to line 1299 because the condition on line 1279 was never true

1280 v 

1281 in [ 

1282 "Date", 

1283 "Period", 

1284 "And", 

1285 "Or", 

1286 "Not", 

1287 "Implies", 

1288 "and", 

1289 "or", 

1290 "not", 

1291 "True", 

1292 "False", 

1293 "year", 

1294 "month", 

1295 "day", 

1296 ] 

1297 for v in expr_vars 

1298 ): 

1299 continue 

1300 

1301 # Store the full expression (not just variable names) 

1302 if arg not in component_usage: 1302 ↛ 1304line 1302 didn't jump to line 1304 because the condition on line 1302 was always true

1303 component_usage[arg] = set() 

1304 component_usage[arg].add(component_type) 

1305 

1306 # Convert sets to single strings, or 'mixed' if used in multiple positions 

1307 result = {} 

1308 for expr, components in component_usage.items(): 

1309 if len(components) == 1: 1309 ↛ 1312line 1309 didn't jump to line 1312 because the condition on line 1309 was always true

1310 result[expr] = list(components)[0] 

1311 else: 

1312 result[expr] = "mixed" 

1313 

1314 return result 

1315 

1316 def _extract_and_replace_parametric_dates( 

1317 self, constraints: List[str], variable_types: Dict[str, str] 

1318 ) -> tuple[List[str], Dict[str, tuple[str, str, str]]]: 

1319 """ 

1320 Extract parametric Date constructors and replace them with auxiliary variables. 

1321 

1322 A parametric Date constructor has one or more variable arguments, e.g., Date(x.year, 2, 28). 

1323 These need special handling because they can't be constructed directly at runtime. 

1324 

1325 Args: 

1326 constraints: List of constraint strings 

1327 variable_types: Dictionary of declared variable types 

1328 

1329 Returns: 

1330 Tuple of (modified_constraints, parametric_dates_map) 

1331 where parametric_dates_map is {aux_var_name: (year_expr, month_expr, day_expr)} 

1332 """ 

1333 import re 

1334 

1335 parametric_dates = {} # aux_var -> (year, month, day) 

1336 aux_counter = [0] # Use list for mutability in nested function 

1337 

1338 def replace_parametric_date(match): 

1339 """Replace a parametric Date constructor with an auxiliary variable.""" 

1340 full_match = match.group(0) 

1341 args = match.group(1) 

1342 

1343 # Parse the three arguments 

1344 # Simple comma splitting won't work due to nested expressions, so use a proper parser 

1345 depth = 0 

1346 current_arg = [] 

1347 parsed_args = [] 

1348 

1349 for char in args: 

1350 if char == "(": 1350 ↛ 1351line 1350 didn't jump to line 1351 because the condition on line 1350 was never true

1351 depth += 1 

1352 current_arg.append(char) 

1353 elif char == ")": 1353 ↛ 1354line 1353 didn't jump to line 1354 because the condition on line 1353 was never true

1354 depth -= 1 

1355 current_arg.append(char) 

1356 elif char == "," and depth == 0: 

1357 parsed_args.append("".join(current_arg).strip()) 

1358 current_arg = [] 

1359 else: 

1360 current_arg.append(char) 

1361 

1362 if current_arg: 1362 ↛ 1365line 1362 didn't jump to line 1365 because the condition on line 1362 was always true

1363 parsed_args.append("".join(current_arg).strip()) 

1364 

1365 if len(parsed_args) != 3: 1365 ↛ 1366line 1365 didn't jump to line 1366 because the condition on line 1365 was never true

1366 return full_match # Not a valid Date constructor 

1367 

1368 year_expr, month_expr, day_expr = parsed_args 

1369 

1370 # Check if any argument contains a variable (not just constants) 

1371 has_variable = ( 

1372 ConstraintTransformer._has_variable(year_expr) 

1373 or ConstraintTransformer._has_variable(month_expr) 

1374 or ConstraintTransformer._has_variable(day_expr) 

1375 ) 

1376 

1377 if not has_variable: 

1378 # All concrete - no transformation needed 

1379 return full_match 

1380 

1381 # This is a parametric Date - create an auxiliary variable 

1382 aux_var = f"_aux_date_{aux_counter[0]}" 

1383 aux_counter[0] += 1 

1384 

1385 parametric_dates[aux_var] = (year_expr, month_expr, day_expr) 

1386 

1387 return aux_var 

1388 

1389 # Pattern to match Date(...) constructors 

1390 # This matches Date( followed by balanced parentheses 

1391 date_pattern = r"Date\(([^)]+(?:\([^)]*\)[^)]*)*)\)" 

1392 

1393 modified_constraints = [] 

1394 for constraint in constraints: 

1395 # Replace all parametric Date constructors in this constraint 

1396 modified = re.sub(date_pattern, replace_parametric_date, constraint) 

1397 modified_constraints.append(modified) 

1398 

1399 return modified_constraints, parametric_dates 

1400 

1401 def generate_builder_code( 

1402 self, 

1403 constraints: List[str], 

1404 declarations: List[str] = None, 

1405 ) -> str: 

1406 """ 

1407 Generate complete DateSATBuilder code from structured constraint data. 

1408 

1409 Each constraint is a full boolean expression that can include: 

1410 - Comparisons: x >= Date(2000,2,28) 

1411 - Boolean operators: && (and), || (or), ! (not) 

1412 - Implications: (A) -> (B) 

1413 - Nested expressions: ((a || b) || c) || d 

1414 - Parametric Date constructors: Date(x.year, 2, 28) in any context 

1415 

1416 All constraints in the list are ANDed together. 

1417 

1418 Variable declarations can be provided separately via the `declarations` parameter, 

1419 or mixed in with constraints (backward compatibility). All variables used in constraints 

1420 must be explicitly declared. 

1421 

1422 Args: 

1423 constraints: List of constraint strings (each is a full boolean expression) 

1424 declarations: Optional list of variable declarations like ["x: date", "y: int"]. 

1425 If provided, declarations are not extracted from constraints. 

1426 

1427 Returns: 

1428 Complete Python code string 

1429 

1430 Raises: 

1431 ValueError: If any variable used in constraints is not explicitly declared 

1432 """ 

1433 code_lines = [ 

1434 "from z3 import Or, And, Not, Int, Bool, Implies", 

1435 "builder = DateSATBuilder()", 

1436 "", 

1437 ] 

1438 

1439 # Extract variable declarations 

1440 if declarations is not None: 

1441 # New format: declarations provided separately 

1442 variable_types = self.extract_variable_declarations(declarations) 

1443 filtered_constraints = ( 

1444 constraints # No need to filter, constraints don't contain declarations 

1445 ) 

1446 else: 

1447 # Old format: declarations mixed with constraints (backward compatibility) 

1448 variable_types = self.extract_variable_declarations(constraints) 

1449 filtered_constraints = self.filter_declarations_from_constraints( 

1450 constraints 

1451 ) 

1452 

1453 self.variable_types = variable_types 

1454 

1455 # Validate parentheses balance in all constraints FIRST (before any other validation) 

1456 for constraint in filtered_constraints: 

1457 self._validate_parentheses_balance(constraint) 

1458 

1459 # Auto-extract variables from remaining constraints 

1460 # We extract from ORIGINAL constraints (before transformation) to capture 

1461 # variables inside parametric Date constructors for type inference 

1462 all_variables = self.extract_variables_from_constraints(filtered_constraints) 

1463 

1464 # Check for common mistakes: lowercase boolean literals (before type inference) 

1465 lowercase_bools = {"true": "True", "false": "False"} 

1466 bool_mistakes = [var for var in all_variables if var in lowercase_bools] 

1467 if bool_mistakes: 

1468 corrections = [ 

1469 f"'{var}' should be '{lowercase_bools[var]}'" for var in bool_mistakes 

1470 ] 

1471 raise ValueError( 

1472 f"Invalid boolean literal(s): {', '.join(corrections)}. " 

1473 f"Python uses capitalized 'True' and 'False' for boolean values." 

1474 ) 

1475 

1476 # Infer types for undeclared variables from their usage context 

1477 # Skip inference for already-declared variables to avoid conflicts 

1478 inferred_types = self.infer_variable_types_from_context( 

1479 filtered_constraints, skip_declared=variable_types 

1480 ) 

1481 

1482 # Check for type conflicts between explicit declarations and inferred usage 

1483 # This catches cases where a variable is declared one type but used as another 

1484 type_conflicts = [] 

1485 for var_name, inferred_type in inferred_types.items(): 

1486 if var_name in variable_types: 

1487 declared_type = variable_types[var_name] 

1488 if declared_type != inferred_type: 

1489 type_conflicts.append((var_name, declared_type, inferred_type)) 

1490 

1491 if type_conflicts: 

1492 conflict_messages = [] 

1493 for var_name, declared_type, inferred_type in type_conflicts: 

1494 conflict_messages.append( 

1495 f"'{var_name}' is declared as '{declared_type}' but inferred as '{inferred_type}' from usage context" 

1496 ) 

1497 raise ValueError( 

1498 f"Type conflict detected:\n - " + "\n - ".join(conflict_messages) 

1499 ) 

1500 

1501 # Merge inferred types into variable_types (explicit declarations take precedence) 

1502 for var_name, var_type in inferred_types.items(): 

1503 if var_name not in variable_types: 

1504 variable_types[var_name] = var_type 

1505 

1506 # Check for undeclared variables that couldn't be inferred 

1507 undeclared_vars = [var for var in all_variables if var not in variable_types] 

1508 if undeclared_vars: 1508 ↛ 1511line 1508 didn't jump to line 1511 because the condition on line 1508 was never true

1509 # For undeclared variables, use inferred types 

1510 # This allows the parser to continue with best-effort type inference 

1511 for var in undeclared_vars: 

1512 if var in inferred_types: 

1513 variable_types[var] = inferred_types[var] 

1514 

1515 # Check if there are still undeclared variables after inference 

1516 still_undeclared = [ 

1517 var for var in undeclared_vars if var not in variable_types 

1518 ] 

1519 if still_undeclared: 

1520 undeclared_str = ", ".join(sorted(still_undeclared)) 

1521 raise ValueError( 

1522 f"Could not infer types for variables: {undeclared_str}. " 

1523 f"Please explicitly declare them using 'variable_name: type' syntax " 

1524 f"(where type is 'date', 'int', or 'bool')." 

1525 ) 

1526 

1527 # Infer bounds for integer variables used in Date() constructors 

1528 component_bounds = self.infer_date_component_bounds(filtered_constraints) 

1529 

1530 # Add variable declarations 

1531 for var_name, var_type in sorted(variable_types.items()): 

1532 if var_type == "date": 

1533 code_lines.append(f'{var_name} = builder.add_date_var("{var_name}")') 

1534 elif var_type == "int": 

1535 # Create int variables without bounds - bounds will be added as constraints 

1536 code_lines.append(f'{var_name} = builder.add_int_var("{var_name}")') 

1537 elif var_type == "bool": 1537 ↛ 1531line 1537 didn't jump to line 1531 because the condition on line 1537 was always true

1538 code_lines.append(f'{var_name} = builder.add_bool_var("{var_name}")') 

1539 

1540 # Add natural bounds constraints for integer expressions used in Date() constructors 

1541 # Only add bounds for non-constant expressions (variables or complex expressions) 

1542 if component_bounds: 

1543 # Filter out constant numbers - they don't need bounds 

1544 non_constant_bounds = { 

1545 expr: comp_type 

1546 for expr, comp_type in component_bounds.items() 

1547 if not expr.lstrip( 

1548 "-" 

1549 ).isdigit() # Skip if it's a plain number like "2020" or "-5" 

1550 } 

1551 

1552 if non_constant_bounds: 1552 ↛ 1597line 1552 didn't jump to line 1597 because the condition on line 1552 was always true

1553 code_lines.append("") 

1554 code_lines.append("# Automatic bounds for Date() component expressions") 

1555 for expr, component_type in sorted(non_constant_bounds.items()): 

1556 # Wrap complex expressions in parentheses for safety 

1557 if any(op in expr for op in ["+", "-", "*"]): 

1558 expr_wrapped = f"({expr})" 

1559 else: 

1560 expr_wrapped = expr 

1561 

1562 if component_type == "year": 

1563 code_lines.append( 

1564 f"builder.add_constraint({expr_wrapped} >= 1900)" 

1565 ) 

1566 code_lines.append( 

1567 f"builder.add_constraint({expr_wrapped} <= 2100)" 

1568 ) 

1569 elif component_type == "month": 

1570 code_lines.append( 

1571 f"builder.add_constraint({expr_wrapped} >= 1)" 

1572 ) 

1573 code_lines.append( 

1574 f"builder.add_constraint({expr_wrapped} <= 12)" 

1575 ) 

1576 elif component_type == "day": 1576 ↛ 1583line 1576 didn't jump to line 1583 because the condition on line 1576 was always true

1577 code_lines.append( 

1578 f"builder.add_constraint({expr_wrapped} >= 1)" 

1579 ) 

1580 code_lines.append( 

1581 f"builder.add_constraint({expr_wrapped} <= 31)" 

1582 ) 

1583 elif component_type == "mixed": 

1584 # Expression used in multiple positions - use most restrictive bounds 

1585 code_lines.append( 

1586 f"# Warning: {expr} used in multiple Date() positions - using conservative bounds" 

1587 ) 

1588 code_lines.append( 

1589 f"builder.add_constraint({expr_wrapped} >= 1)" 

1590 ) 

1591 code_lines.append( 

1592 f"builder.add_constraint({expr_wrapped} <= 2100)" 

1593 ) 

1594 

1595 # STEP: Transform parametric Date constructors AFTER type inference and bounds checking 

1596 # This must happen before parsing to avoid runtime errors 

1597 transformed_constraints, parametric_dates = ( 

1598 self._extract_and_replace_parametric_dates( 

1599 filtered_constraints, variable_types 

1600 ) 

1601 ) 

1602 

1603 # Add auxiliary date variables to variable_types 

1604 for aux_var in parametric_dates: 

1605 variable_types[aux_var] = "date" 

1606 

1607 # Add variable declarations for auxiliary date variables 

1608 for aux_var in sorted(parametric_dates.keys()): 

1609 code_lines.append(f'{aux_var} = builder.add_date_var("{aux_var}")') 

1610 

1611 # Add constraints for parametric Date constructors (auxiliary variables) 

1612 if parametric_dates: 

1613 code_lines.append("") 

1614 code_lines.append("# Constraints for parametric Date constructors") 

1615 for aux_var, (year_expr, month_expr, day_expr) in sorted( 

1616 parametric_dates.items() 

1617 ): 

1618 # Generate component-wise constraints: aux_var.year == year_expr, etc. 

1619 code_lines.append( 

1620 f"builder.add_constraint({aux_var}.year == {year_expr})" 

1621 ) 

1622 code_lines.append( 

1623 f"builder.add_constraint({aux_var}.month == {month_expr})" 

1624 ) 

1625 code_lines.append( 

1626 f"builder.add_constraint({aux_var}.day == {day_expr})" 

1627 ) 

1628 

1629 # Add constraints (using TRANSFORMED constraints) 

1630 # Each constraint is a full boolean expression - parse and add directly 

1631 code_lines.append("") 

1632 for constraint_str in transformed_constraints: 

1633 constraint_code = self.parse_constraint(constraint_str, variable_types) 

1634 code_lines.append(constraint_code) 

1635 

1636 return "\n".join(code_lines) 

1637 

1638 def parse_constraint_data(self, constraint_data: Dict[str, Any]) -> str: 

1639 """ 

1640 Parse constraint data and return executable code. 

1641 

1642 Supports two formats: 

1643 1. New format (recommended): Separate 'declarations' and 'constraints' fields 

1644 { 

1645 "declarations": ["x: date", "y: int"], 

1646 "constraints": ["x >= Date(2000,2,28)", "(a || b) && c"] 

1647 } 

1648 

1649 2. Old format (backward compatible): Declarations mixed with constraints 

1650 { 

1651 "constraints": ["x: date", "x >= Date(2000,2,28)", "(a || b) && c"] 

1652 } 

1653 

1654 Each constraint is a full boolean expression that can include: 

1655 - Comparisons: x >= Date(2000,2,28) 

1656 - Boolean operators: && (and), || (or), ! (not) 

1657 - Implications: (A) -> (B) 

1658 - Nested expressions: ((a || b) || c) || d 

1659 

1660 All constraints in the list are ANDed together. 

1661 

1662 Args: 

1663 constraint_data: Dictionary with: 

1664 - 'constraints': List of constraint strings (required) 

1665 - 'declarations': Optional list of variable declarations (new format) 

1666 

1667 Returns: 

1668 Executable Python code string 

1669 """ 

1670 constraints = constraint_data.get("constraints", []) 

1671 declarations = constraint_data.get("declarations", None) 

1672 

1673 # Ensure constraints is a list of strings (no nested lists) 

1674 for constraint in constraints: 

1675 if isinstance(constraint, list): 

1676 raise ValueError( 

1677 "Nested lists are not supported. Each constraint must be a string with boolean operators." 

1678 ) 

1679 

1680 # If declarations provided, ensure it's also a list of strings 

1681 if declarations is not None: 

1682 for declaration in declarations: 

1683 if isinstance(declaration, list): 1683 ↛ 1684line 1683 didn't jump to line 1684 because the condition on line 1683 was never true

1684 raise ValueError( 

1685 "Nested lists are not supported. Each declaration must be a string like 'x: date'." 

1686 ) 

1687 if not isinstance(declaration, str): 1687 ↛ 1688line 1687 didn't jump to line 1688 because the condition on line 1687 was never true

1688 raise ValueError( 

1689 f"Invalid declaration format: {declaration}. Expected string like 'x: date'." 

1690 ) 

1691 

1692 return self.generate_builder_code(constraints, declarations)