First Examples¶
Here are simple examples of how to use the ServiceX client to extract data from a dataset. Each examples shows the three ways to specify a request: as a YAML file, as a Python dictionary, and as a typed Python object.
Examples For Each Query Type¶
Examples for each query type (Uproot-Raw
, Func_ADL Uproot
, Python Function
)
and three different representations (Python Dictionary
, Python Typed Object
, and YAML
).
Note that all examples extract the same branch of the same tree (ROOT TTree
) from
the same dataset.
Uproot-Raw Query Example¶
This example uses the raw uproot query type to extract the AnalysisElectronsAuxDyn.pt
branch
from the CollectionTree
tree in ATLAS PHYSLITE OpenData Dataset.
from servicex import query, dataset, deliver
spec = {
'Sample': [{
'Name': "UprootRaw_Dict",
'Dataset': dataset.FileList(
[
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000001.pool.root.1", # noqa: E501
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000002.pool.root.1", # noqa: E501
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000003.pool.root.1", # noqa: E501
]
),
'Query': query.UprootRaw(
[
{
"treename": "CollectionTree",
"filter_name": "AnalysisElectronsAuxDyn.pt",
}
]
)
}]
}
print(f"Files: {deliver(spec)}")
from servicex import Sample, ServiceXSpec, query, dataset, deliver
spec = ServiceXSpec(
Sample=[
Sample(
Name="UprootRaw_Typed",
Dataset=dataset.FileList(
[
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000001.pool.root.1", # noqa: E501
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000002.pool.root.1", # noqa: E501
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000003.pool.root.1", # noqa: E501
]
),
Query=query.UprootRaw(
[
{
"treename": "CollectionTree",
"filter_name": "AnalysisElectronsAuxDyn.pt",
}
]
)
)
]
)
print(f"Files: {deliver(spec)}")
# File: config_UprootRaw.yaml
Sample:
- Name: UprootRaw_YAML
Dataset: !FileList
[
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000001.pool.root.1",
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000002.pool.root.1",
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000003.pool.root.1",
]
Query: !UprootRaw |
[{"treename":"CollectionTree", "filter_name": "AnalysisElectronsAuxDyn.pt"}]
Func_ADL Uproot Query Example¶
This example uses an Func_ADL to extract the AnalysisElectronsAuxDyn.pt
branch
from the CollectionTree
tree in ATLAS PHYSLITE OpenData Dataset.
from servicex import query, dataset, deliver
spec = {
'Sample': [{
'Name': "FuncADL_Uproot_Dict",
'Dataset': dataset.FileList(
[
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000001.pool.root.1", # noqa: E501
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000002.pool.root.1", # noqa: E501
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000003.pool.root.1", # noqa: E501
]
),
'Query': query.FuncADL_Uproot()
.FromTree('CollectionTree')
.Select(lambda e: {'el_pt': e['AnalysisElectronsAuxDyn.pt']}) # type: ignore
}]
}
print(f"Files: {deliver(spec)}")
from servicex import Sample, ServiceXSpec, query, dataset, deliver
spec = ServiceXSpec(
Sample=[
Sample(
Name="FuncADL_Uproot_Typed",
Dataset=dataset.FileList(
[
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000001.pool.root.1", # noqa: E501
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000002.pool.root.1", # noqa: E501
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000003.pool.root.1", # noqa: E501
]
),
Query=query.FuncADL_Uproot()
.FromTree('CollectionTree')
.Select(lambda e: {'el_pt': e['AnalysisElectronsAuxDyn.pt']}) # type:ignore
)
]
)
print(f"Files: {deliver(spec)}")
# File: config_FuncADL_Uproot.yaml
Sample:
- Name: Uproot_FuncADL_YAML
Dataset: !FileList
[
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000001.pool.root.1",
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000002.pool.root.1",
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000003.pool.root.1",
]
Query: !FuncADL_Uproot |
FromTree('CollectionTree').Select(lambda e: {'el_pt': e['AnalysisElectronsAuxDyn.pt']})
Func_ADL xAOD Query Example¶
The two following examples read columns of data from an ATLAS PHYSLITE xAOD file (released by the experiment as OpenData). It uses the internal C++ framework, EventLoop, to read this data. EventLoop can be used to read xAOD files in general, not just PHYSLITE.
THe first example uses the very simple model that is built into ServiceX:
from servicex import query as q, deliver, dataset
def func_adl_xaod_simple():
query = q.FuncADL_ATLASr22() # type: ignore
jets_per_event = query.Select(lambda e: e.Jets('AnalysisJets'))
jet_info_per_event = jets_per_event.Select(
lambda jets: {
'pt': jets.Select(lambda j: j.pt()),
'eta': jets.Select(lambda j: j.eta())
}
)
spec = {
'Sample': [{
'Name': "func_adl_xAOD_simple",
'Dataset': dataset.FileList(
[
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/mc20_13TeV/DAOD_PHYSLITE.37622528._000013.pool.root.1", # noqa: E501
]
),
'Query': jet_info_per_event
}]
}
files = deliver(spec, servicex_name="servicex-uc-af")
assert files is not None, "No files returned from deliver! Internal error"
return files
if __name__ == "__main__":
files = func_adl_xaod_simple()
assert len(files['func_adl_xAOD_simple']) == 1
The second example uses the full type information, allowing one to access everything that could be translated in the xAOD (including ElementLink following):
from servicex import deliver, dataset
from func_adl_servicex_xaodr22 import FuncADLQueryPHYSLITE, cpp_float
def func_adl_xaod_typed():
query = FuncADLQueryPHYSLITE() # type: ignore
jets_per_event = query.Select(lambda e: e.Jets('AnalysisJets'))
jet_info_per_event = jets_per_event.Select(
lambda jets: {
'pt': jets.Select(lambda j: j.pt()),
'eta': jets.Select(lambda j: j.eta()),
'emf': jets.Select(lambda j: j.getAttribute[cpp_float]('EMFrac')) # type: ignore
}
)
spec = {
'Sample': [{
'Name': "func_adl_xAOD_simple",
'Dataset': dataset.FileList(
[
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/mc20_13TeV/DAOD_PHYSLITE.37622528._000013.pool.root.1", # noqa: E501
]
),
'Query': jet_info_per_event,
'Codegen': 'atlasr22',
}]
}
files = deliver(spec, servicex_name="servicex-uc-af")
assert files is not None, "No files returned from deliver! Internal error"
return files
if __name__ == "__main__":
files = func_adl_xaod_typed()
assert len(files['func_adl_xAOD_simple']) == 1
For this second example, make sure the extra package func_adl_servicex_xaodr22
is installed!
Python Function Query Example¶
This example uses an uproot python function to extract the AnalysisElectronsAuxDyn.pt
branch
from the CollectionTree
tree in ATLAS PHYSLITE OpenData Dataset.
Note that you can specify a python function even in a yaml file.
from servicex import query, dataset, deliver
def run_query(input_filenames=None):
import uproot # type: ignore
with uproot.open({input_filenames: "CollectionTree"}) as o:
br = o.arrays("AnalysisElectronsAuxDyn.pt")
return br
spec = {
'Sample': [{
'Name': "PythonFunction_Dict",
'Dataset': dataset.FileList(
[
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000001.pool.root.1", # noqa: E501
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000002.pool.root.1", # noqa: E501
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000003.pool.root.1", # noqa: E501
]
),
'Query': query.PythonFunction().with_uproot_function(run_query)
}]
}
print(f"Files: {deliver(spec)}")
from servicex import Sample, ServiceXSpec, query, dataset, deliver
def run_query(input_filenames=None):
import uproot # type: ignore
with uproot.open({input_filenames: "CollectionTree"}) as o:
br = o.arrays("AnalysisElectronsAuxDyn.pt")
return br
spec = ServiceXSpec(
Sample=[
Sample(
Name="PythonFunction_Typed",
Dataset=dataset.FileList(
[
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000001.pool.root.1", # noqa: E501
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000002.pool.root.1", # noqa: E501
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000003.pool.root.1", # noqa: E501
]
),
Query=query.PythonFunction().with_uproot_function(run_query)
)
]
)
print(f"Files: {deliver(spec)}")
# File: config_PythonFunction.yaml
Sample:
- Name: Uproot_PythonFunction_YAML
Dataset: !FileList
[
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000001.pool.root.1",
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000002.pool.root.1",
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000003.pool.root.1",
]
Query: !PythonFunction |
def run_query(input_filenames=None):
import uproot
with uproot.open({input_filenames:"CollectionTree"}) as o:
br = o.arrays("AnalysisElectronsAuxDyn.pt")
return br
The ServiceX Deliver Function¶
The deliver
function is used to submit a request to ServiceX. It takes a request in one of the
three formats and returns a python dictionary with the name of the sample as a key
and a list of URLs or local file paths as a value.
How to Use YAML Specification¶
YAML specification can be consumed by passing it to the ServiceX deliver
function.
You can use the following code:
from servicex import deliver
print(
deliver("config_Uproot_FuncADL.yaml")
)
The Dataset in Examples¶
The dataset in the examples is publically accessible ATLAS Open Data (ATLAS DAOD PHYSLITE format Run 2 2016 proton-proton collision data).