Example of parsing a .dec decay file with Lark#
This file demonstrates how to parse a decfile with only the Lark grammar definition. Much of the functionality demonstrated here is already provided as part of decaylanguage in a more thorough and detailed form. This example only serves to show the key data structures used.
[1]:
from __future__ import annotations
import re
from lark import Lark, Tree
from decaylanguage import data
[2]:
with data.basepath.joinpath("decfile.lark").open() as f:
grammar = f.read()
with open("../tests/data/test_example_Dst.dec") as f:
dec_file = f.read()
For illustration - the grammar Lark file:
[3]:
print(grammar)
// Copyright (c) 2018-2026, Eduardo Rodrigues and Henry Schreiner.
//
// Distributed under the 3-clause BSD license, see accompanying file LICENSE
// or https://github.com/scikit-hep/decaylanguage for details.
start : _NEWLINE* (line _NEWLINE+)* ("End" _NEWLINE+)?
?line : define | particle_def | pythia_def | jetset_def | ls_def | model_alias | alias | chargeconj | commands | decay | cdecay | copydecay | setlspw | setlsbw | changemasslimit | inc_factor
pythia_def : LABEL_PYTHIA8_COMMANDS LABEL ":" LABEL "=" (LABEL | SIGNED_NUMBER) // Pythia 8 commands
LABEL_PYTHIA8_COMMANDS : "PythiaAliasParam" | "PythiaBothParam" | "PythiaGenericParam"
jetset_def : "JetSetPar" LABEL "=" SIGNED_NUMBER // Old Pythia 6 commands
ls_def : LABEL_LINESHAPE LABEL // Choose a lineshape for a particle
LABEL_LINESHAPE : "LSFLAT" | "LSNONRELBW" | "LSMANYDELTAFUNC" // Lineshape flat | non-relativistic BW, spikes
inc_factor: LABEL_INCLUDE_FACTOR LABEL BOOLEAN_INCLUDE_FACTOR // Presence of the birth/decay momentum factor and form-factor
LABEL_INCLUDE_FACTOR : "IncludeBirthFactor" | "IncludeDecayFactor"
BOOLEAN_INCLUDE_FACTOR : "yes" | "no"
setlsbw : "BlattWeisskopf" LABEL SIGNED_NUMBER // Set Blatt-Weisskopf barrier factor for a lineshape
setlspw : "SetLineshapePW" LABEL LABEL LABEL INT // Redefine Partial Wave for label -> label label
cdecay : "CDecay" LABEL
define : "Define" LABEL SIGNED_NUMBER
particle_def: "Particle" LABEL SIGNED_NUMBER SIGNED_NUMBER? // Set the mass and width (optional) of a particle (in GeV)
alias : "Alias" LABEL LABEL
chargeconj : "ChargeConj" LABEL LABEL
changemasslimit : LABEL_CHANGE_MASS LABEL SIGNED_NUMBER // Set upper/lower mass cuts on a lineshape
LABEL_CHANGE_MASS : "ChangeMassMin" | "ChangeMassMax"
?commands : global_photos
global_photos : boolean_photos
boolean_photos : "yesPhotos" -> yes
| "noPhotos" -> no
decay : "Decay" particle _NEWLINE+ decayline* "Enddecay"
decayline : value particle* photos? model _NEWLINE+
value : SIGNED_NUMBER
photos : "PHOTOS"
copydecay : "CopyDecay" label label
label : LABEL
particle : LABEL // Add full particle parsing here
model_label : LABEL
model_alias : "ModelAlias" model_label model
model : (model_label | MODEL_NAME model_options?) _SEMICOLON+
model_options : (value | LABEL | _NEWLINE | _COMMA)+
// We must set priorities here to use lalr - match model name above label, and label above something else
// This is supposed to be empty and will be filled via the `edit_terminals` functionality with a list of models
MODEL_NAME.2 : "MODEL_NAME_PLACEHOLDER"/\b/
// Terminal definitions
// To use a fast parser, we need to avoid conflicts
%import common.WS_INLINE
%import common.INT
%import common.SIGNED_NUMBER
// Disregard comments, (multiple) newlines and whitespace in parser tree
_NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )
_SEMICOLON: /;/
_COMMA: /,/
_WS: WS_INLINE
LABEL : /[a-zA-Z0-9\/\-+*_().'~]+/
COMMENT : /[#][^\n]*/
// We should ignore comments
%ignore COMMENT
// Disregard spaces in text
%ignore WS_INLINE
For illustration - the .dec decay file:
[4]:
print(dec_file)
# Example decay chain for testing purposes
# Considered by itself, this file in in fact incomplete,
# as there are no instructions on how to decay the anti-D0 and the D-!
Decay D*+
0.677 D0 pi+ VSS;
0.307 D+ pi0 VSS;
0.016 D+ gamma VSP_PWAVE;
Enddecay
Decay D*-
0.6770 anti-D0 pi- VSS;
0.3070 D- pi0 VSS;
0.0160 D- gamma VSP_PWAVE;
Enddecay
Decay D0
1.0 K- pi+ PHSP;
Enddecay
Decay D+
1.0 K- pi+ pi+ pi0 PHSP;
Enddecay
Decay pi0
0.988228297 gamma gamma PHSP;
0.011738247 e+ e- gamma PI0_DALITZ;
0.000033392 e+ e+ e- e- PHSP;
0.000000065 e+ e- PHSP;
Enddecay
Define a helper function to dynamically load the model names needed to parse the decfile
[5]:
def edit_model_name_terminals(t) -> None:
"""
Edits the terminals of the grammar to replace the model name placeholder with the actual names of the models.
"""
decay_models = ("VSS", "VSP_PWAVE", "PHSP", "PI0_DALITZ")
modelstr = rf"(?:{'|'.join(re.escape(dm) for dm in sorted(decay_models, key=len, reverse=True))})"
if t.name == "MODEL_NAME":
t.pattern.value = t.pattern.value.replace("MODEL_NAME_PLACEHOLDER", modelstr)
Parse the .dec decay file.
[6]:
l = Lark(grammar, parser="lalr", lexer="auto", edit_terminals=edit_model_name_terminals)
parsed_dec_file = l.parse(dec_file)
[7]:
def number_of_decays(parsed_file):
"""Returns the number of particle decays defined in the parsed .dec file."""
return len(list(parsed_file.find_data("decay")))
print("# of decays in file =", number_of_decays(parsed_dec_file))
# of decays in file = 5
[8]:
def list_of_decay_trees(parsed_file):
"""Return a list of the actual decays defined in the .dec file."""
return list(parsed_file.find_data("decay"))
[9]:
def get_decay_mode_details(decay_mode_Tree):
"""Parse a decay mode tree and return the relevant bits of information in it."""
bf = (
next(iter(decay_mode_Tree.find_data("value"))).children[0].value
if len(list(decay_mode_Tree.find_data("value"))) == 1
else None
)
bf = float(bf)
products = tuple(
[
p.children[0].value
for p in decay_mode_Tree.children
if isinstance(p, Tree) and p.data == "particle"
]
)
model = (
next(iter(decay_mode_Tree.find_data("model"))).children[0].value
if len(list(decay_mode_Tree.find_data("model"))) == 1
else None
)
return (bf, products, model)
Finally, digest all Lark’s Tree objects parsed and collect the information of all defined decays.
[10]:
decays = {}
for tree in list_of_decay_trees(parsed_dec_file):
if tree.data == "decay":
if tree.children[0].children[0].value in decays:
print(
f"Decays of particle {tree.children[0].children[0].value} are redefined! Please check your .dec file."
)
decays[tree.children[0].children[0].value] = []
for decay_mode in tree.find_data("decayline"):
decays[tree.children[0].children[0].value].append(
get_decay_mode_details(decay_mode)
)
For illustration - print out the decay modes:
[11]:
def print_decay(dec, final_state):
"""Pretty print of the decay modes of a given particle."""
print(dec)
for fs in final_state:
print(f"{fs[0]:12g} : {' '.join(p for p in fs[1]):50s} {fs[2]:15s}")
[12]:
print_decay("pi0", decays["pi0"])
pi0
0.988228 : gamma gamma PHSP
0.0117382 : e+ e- gamma PI0_DALITZ
3.3392e-05 : e+ e+ e- e- PHSP
6.5e-08 : e+ e- PHSP
[13]:
for particle, decay_info in decays.items():
print_decay(particle, decay_info)
D*+
0.677 : D0 pi+ VSS
0.307 : D+ pi0 VSS
0.016 : D+ gamma VSP_PWAVE
D*-
0.677 : anti-D0 pi- VSS
0.307 : D- pi0 VSS
0.016 : D- gamma VSP_PWAVE
D0
1 : K- pi+ PHSP
D+
1 : K- pi+ pi+ pi0 PHSP
pi0
0.988228 : gamma gamma PHSP
0.0117382 : e+ e- gamma PI0_DALITZ
3.3392e-05 : e+ e+ e- e- PHSP
6.5e-08 : e+ e- PHSP
For illustration - produce a dot plot of a decay Tree:
[14]:
from IPython.display import Image
from lark.tree import pydot__tree_to_png # requires pydot
pydot__tree_to_png(
list_of_decay_trees(parsed_dec_file)[0], filename="decay.png", rankdir="LR"
)
Image(filename="decay.png")
[14]: