Implementing a grid search sampler from scratchยค
In this example, we will implement a grid search sampler from scratch. The grid search sampler is a simple sampler that evaluates all possible combinations of the parameters in the domain. This is useful for small domains, but it can become computationally expensive for larger domains. We will show how to create this sampler and use it in a f3dasm data-driven experiment.
from __future__ import annotations
from itertools import product
from typing import Optional
import numpy as np
import pandas as pd
from f3dasm import Block, ExperimentData
from f3dasm.design import Domain
Invalid MIT-MAGIC-COOKIE-1 key
When integrating your sampling strategy into the data-driven process, you have to create a new class that inherits from the Block base class.
class GridSampler(Block):
def __init__(
self, stepsize_continuous_parameters: Optional[dict[str, float]] = None
):
self.stepsize_continuous_parameters = stepsize_continuous_parameters
def arm(self, data: ExperimentData):
# Extract only the continuous variables
continuous = data.domain.continuous
discrete = data.domain.discrete
categorical = data.domain.categorical
constant = data.domain.constant
self._iterdict = {}
if continuous.input_space:
discrete_space = {
key: continuous.input_space[key].to_discrete(step=value)
for key, value in self.stepsize_continuous_parameters.items()
}
continuous = Domain(input_space=discrete_space)
for k, v in categorical.input_space.items():
self._iterdict[k] = v.categories
for (
k,
v,
) in discrete.input_space.items():
self._iterdict[k] = range(v.lower_bound, v.upper_bound + 1, v.step)
for (
k,
v,
) in continuous.input_space.items():
self._iterdict[k] = np.arange(
start=v.lower_bound, stop=v.upper_bound, step=v.step
)
for (
k,
v,
) in constant.input_space.items():
self._iterdict[k] = [v.value]
def call(self, data: ExperimentData) -> ExperimentData:
df = pd.DataFrame(
list(product(*self._iterdict.values())),
columns=self._iterdict,
dtype=object,
)[data.domain.input_names]
return ExperimentData(domain=data.domain, input_data=df)
We will now sample the domain using the grid sampler we implemented. - First, we will create a domain with a mix of continuous, discrete, and categorical parameters to test our implementation.
domain = Domain()
domain.add_float("param_1", -1.0, 1.0)
domain.add_int("param_2", 1, 5)
domain.add_category("param_3", ["red", "blue", "green", "yellow", "purple"])
- We create an
ExperimentDataobject with the domain:
experiment_data = ExperimentData(domain=domain)
- Then, we can create a
GridSamplerblock object:
grid_sampler = GridSampler(stepsize_continuous_parameters={"param_1": 0.1})
- We
armthe sampler with theExperimentDataobject:
grid_sampler.arm(experiment_data)
- Lastly, we call the grids sampler with the created
ExperimentData:
experiment_data = grid_sampler.call(experiment_data)
experiment_data
jobs input
param_1 param_2 param_3
0 open -1.0 1 red
1 open -0.9 1 red
2 open -0.8 1 red
3 open -0.7 1 red
4 open -0.6 1 red
.. ... ... ... ...
495 open 0.5 5 purple
496 open 0.6 5 purple
497 open 0.7 5 purple
498 open 0.8 5 purple
499 open 0.9 5 purple
[500 rows x 4 columns]
Next: Building a Pipeline