Example of parsing a .dec decay file with Lark#

This file demonstrates how to parse a decfile with only the Lark grammar definition. Much of the functionality demonstrated here is already provided as part of decaylanguage in a more thorough and detailed form. This example only serves to show the key data structures used.

[1]:
from __future__ import annotations

import re

from lark import Lark, Tree

from decaylanguage import data
Read in the Lark grammar definition file and the input .dec decay file
[2]:
with data.basepath.joinpath("decfile.lark").open() as f:
    grammar = f.read()

with open("../tests/data/test_example_Dst.dec") as f:
    dec_file = f.read()

For illustration - the grammar Lark file:

[3]:
print(grammar)
// Copyright (c) 2018-2026, Eduardo Rodrigues and Henry Schreiner.
//
// Distributed under the 3-clause BSD license, see accompanying file LICENSE
// or https://github.com/scikit-hep/decaylanguage for details.

start : _NEWLINE* (line _NEWLINE+)* ("End" _NEWLINE+)?
?line : define | particle_def | pythia_def | jetset_def | ls_def | model_alias | alias | chargeconj | commands | decay | cdecay | copydecay | setlspw | setlsbw | changemasslimit | inc_factor

pythia_def : LABEL_PYTHIA8_COMMANDS LABEL ":" LABEL "=" (LABEL | SIGNED_NUMBER)  // Pythia 8 commands
LABEL_PYTHIA8_COMMANDS : "PythiaAliasParam" | "PythiaBothParam" | "PythiaGenericParam"

jetset_def : "JetSetPar" LABEL "=" SIGNED_NUMBER  // Old Pythia 6 commands

ls_def : LABEL_LINESHAPE LABEL // Choose a lineshape for a particle
LABEL_LINESHAPE : "LSFLAT" | "LSNONRELBW" | "LSMANYDELTAFUNC" // Lineshape flat | non-relativistic BW, spikes

inc_factor: LABEL_INCLUDE_FACTOR LABEL BOOLEAN_INCLUDE_FACTOR // Presence of the birth/decay momentum factor and form-factor
LABEL_INCLUDE_FACTOR : "IncludeBirthFactor" | "IncludeDecayFactor"
BOOLEAN_INCLUDE_FACTOR : "yes" | "no"

setlsbw : "BlattWeisskopf" LABEL SIGNED_NUMBER // Set Blatt-Weisskopf barrier factor for a lineshape

setlspw : "SetLineshapePW" LABEL LABEL LABEL INT // Redefine Partial Wave for label -> label label

cdecay : "CDecay" LABEL

define : "Define" LABEL SIGNED_NUMBER

particle_def: "Particle" LABEL SIGNED_NUMBER SIGNED_NUMBER? // Set the mass and width (optional) of a particle (in GeV)

alias : "Alias" LABEL LABEL

chargeconj : "ChargeConj" LABEL LABEL

changemasslimit : LABEL_CHANGE_MASS LABEL SIGNED_NUMBER // Set upper/lower mass cuts on a lineshape
LABEL_CHANGE_MASS : "ChangeMassMin" | "ChangeMassMax"

?commands : global_photos

global_photos : boolean_photos

boolean_photos : "yesPhotos" -> yes
                | "noPhotos"  -> no

decay : "Decay" particle _NEWLINE+ decayline* "Enddecay"
decayline : value particle* photos? model _NEWLINE+
value : SIGNED_NUMBER
photos : "PHOTOS"

copydecay : "CopyDecay" label label

label : LABEL
particle : LABEL // Add full particle parsing here
model_label : LABEL

model_alias : "ModelAlias" model_label model

model : (model_label  | MODEL_NAME model_options?) _SEMICOLON+
model_options : (value | LABEL | _NEWLINE | _COMMA)+

// We must set priorities here to use lalr - match model name above label, and label above something else
// This is supposed to be empty and will be filled via the `edit_terminals` functionality with a list of models
MODEL_NAME.2 : "MODEL_NAME_PLACEHOLDER"/\b/


// Terminal definitions
// To use a fast parser, we need to avoid conflicts

%import common.WS_INLINE
%import common.INT
%import common.SIGNED_NUMBER

// Disregard comments, (multiple) newlines and whitespace in parser tree
_NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )
_SEMICOLON: /;/
_COMMA: /,/
_WS: WS_INLINE

LABEL : /[a-zA-Z0-9\/\-+*_().'~]+/
COMMENT : /[#][^\n]*/

// We should ignore comments
%ignore COMMENT

// Disregard spaces in text
%ignore WS_INLINE

For illustration - the .dec decay file:

[4]:
print(dec_file)
# Example decay chain for testing purposes
# Considered by itself, this file in in fact incomplete,
# as there are no instructions on how to decay the anti-D0 and the D-!

Decay D*+
  0.677             D0 pi+       VSS;
  0.307             D+ pi0       VSS;
  0.016             D+ gamma     VSP_PWAVE;
Enddecay

Decay D*-
0.6770    anti-D0  pi-                VSS;
0.3070    D-       pi0                VSS;
0.0160    D-       gamma              VSP_PWAVE;
Enddecay

Decay D0
1.0   K-      pi+                  PHSP;
Enddecay

Decay D+
1.0   K-   pi+   pi+   pi0    PHSP;
Enddecay

Decay pi0
0.988228297   gamma   gamma                   PHSP;
0.011738247   e+      e-      gamma           PI0_DALITZ;
0.000033392   e+      e+      e-      e-      PHSP;
0.000000065   e+      e-                      PHSP;
Enddecay

Define a helper function to dynamically load the model names needed to parse the decfile

[5]:
def edit_model_name_terminals(t) -> None:
    """
    Edits the terminals of the grammar to replace the model name placeholder with the actual names of the models.
    """
    decay_models = ("VSS", "VSP_PWAVE", "PHSP", "PI0_DALITZ")
    modelstr = rf"(?:{'|'.join(re.escape(dm) for dm in sorted(decay_models, key=len, reverse=True))})"
    if t.name == "MODEL_NAME":
        t.pattern.value = t.pattern.value.replace("MODEL_NAME_PLACEHOLDER", modelstr)

Parse the .dec decay file.

[6]:
l = Lark(grammar, parser="lalr", lexer="auto", edit_terminals=edit_model_name_terminals)
parsed_dec_file = l.parse(dec_file)
[7]:
def number_of_decays(parsed_file):
    """Returns the number of particle decays defined in the parsed .dec file."""
    return len(list(parsed_file.find_data("decay")))


print("# of decays in file =", number_of_decays(parsed_dec_file))
# of decays in file = 5
[8]:
def list_of_decay_trees(parsed_file):
    """Return a list of the actual decays defined in the .dec file."""
    return list(parsed_file.find_data("decay"))
[9]:
def get_decay_mode_details(decay_mode_Tree):
    """Parse a decay mode tree and return the relevant bits of information in it."""
    bf = (
        next(iter(decay_mode_Tree.find_data("value"))).children[0].value
        if len(list(decay_mode_Tree.find_data("value"))) == 1
        else None
    )
    bf = float(bf)
    products = tuple(
        [
            p.children[0].value
            for p in decay_mode_Tree.children
            if isinstance(p, Tree) and p.data == "particle"
        ]
    )
    model = (
        next(iter(decay_mode_Tree.find_data("model"))).children[0].value
        if len(list(decay_mode_Tree.find_data("model"))) == 1
        else None
    )
    return (bf, products, model)

Finally, digest all Lark’s Tree objects parsed and collect the information of all defined decays.

[10]:
decays = {}

for tree in list_of_decay_trees(parsed_dec_file):
    if tree.data == "decay":
        if tree.children[0].children[0].value in decays:
            print(
                f"Decays of particle {tree.children[0].children[0].value} are redefined! Please check your .dec file."
            )
        decays[tree.children[0].children[0].value] = []
        for decay_mode in tree.find_data("decayline"):
            decays[tree.children[0].children[0].value].append(
                get_decay_mode_details(decay_mode)
            )

For illustration - print out the decay modes:

[11]:
def print_decay(dec, final_state):
    """Pretty print of the decay modes of a given particle."""
    print(dec)
    for fs in final_state:
        print(f"{fs[0]:12g} : {'  '.join(p for p in fs[1]):50s} {fs[2]:15s}")
[12]:
print_decay("pi0", decays["pi0"])
pi0
    0.988228 : gamma  gamma                                       PHSP
   0.0117382 : e+  e-  gamma                                      PI0_DALITZ
  3.3392e-05 : e+  e+  e-  e-                                     PHSP
     6.5e-08 : e+  e-                                             PHSP
[13]:
for particle, decay_info in decays.items():
    print_decay(particle, decay_info)
D*+
       0.677 : D0  pi+                                            VSS
       0.307 : D+  pi0                                            VSS
       0.016 : D+  gamma                                          VSP_PWAVE
D*-
       0.677 : anti-D0  pi-                                       VSS
       0.307 : D-  pi0                                            VSS
       0.016 : D-  gamma                                          VSP_PWAVE
D0
           1 : K-  pi+                                            PHSP
D+
           1 : K-  pi+  pi+  pi0                                  PHSP
pi0
    0.988228 : gamma  gamma                                       PHSP
   0.0117382 : e+  e-  gamma                                      PI0_DALITZ
  3.3392e-05 : e+  e+  e-  e-                                     PHSP
     6.5e-08 : e+  e-                                             PHSP

For illustration - produce a dot plot of a decay Tree:

[14]:
from IPython.display import Image
from lark.tree import pydot__tree_to_png  # requires pydot

pydot__tree_to_png(
    list_of_decay_trees(parsed_dec_file)[0], filename="decay.png", rankdir="LR"
)

Image(filename="decay.png")
[14]:
../../_images/examples_notebooks_ExampleDecFileParsingWithLark_23_0.png