Implementing a grid search sampler from scratch¤

In this example, we will implement a grid search sampler from scratch. The grid search sampler is a simple sampler that evaluates all possible combinations of the parameters in the domain. This is useful for small domains, but it can become computationally expensive for larger domains. We will show how to create this sampler and use it in a f3dasm data-driven experiment.

from __future__ import annotations

from itertools import product
from typing import Optional

import numpy as np
import pandas as pd

from f3dasm import Block, ExperimentData
from f3dasm.design import Domain

Invalid MIT-MAGIC-COOKIE-1 key

When integrating your sampling strategy into the data-driven process, you have to create a new class that inherits from the Block base class.

class GridSampler(Block):
    def __init__(
        self, stepsize_continuous_parameters: Optional[dict[str, float]] = None
    ):
        self.stepsize_continuous_parameters = stepsize_continuous_parameters

    def arm(self, data: ExperimentData):
        # Extract only the continuous variables
        continuous = data.domain.continuous
        discrete = data.domain.discrete
        categorical = data.domain.categorical
        constant = data.domain.constant

        self._iterdict = {}

        if continuous.input_space:
            discrete_space = {
                key: continuous.input_space[key].to_discrete(step=value)
                for key, value in self.stepsize_continuous_parameters.items()
            }

            continuous = Domain(input_space=discrete_space)

        for k, v in categorical.input_space.items():
            self._iterdict[k] = v.categories

        for (
            k,
            v,
        ) in discrete.input_space.items():
            self._iterdict[k] = range(v.lower_bound, v.upper_bound + 1, v.step)

        for (
            k,
            v,
        ) in continuous.input_space.items():
            self._iterdict[k] = np.arange(
                start=v.lower_bound, stop=v.upper_bound, step=v.step
            )

        for (
            k,
            v,
        ) in constant.input_space.items():
            self._iterdict[k] = [v.value]

    def call(self, data: ExperimentData) -> ExperimentData:
        df = pd.DataFrame(
            list(product(*self._iterdict.values())),
            columns=self._iterdict,
            dtype=object,
        )[data.domain.input_names]

        return ExperimentData(domain=data.domain, input_data=df)

We will now sample the domain using the grid sampler we implemented. - First, we will create a domain with a mix of continuous, discrete, and categorical parameters to test our implementation.

domain = Domain()
domain.add_float("param_1", -1.0, 1.0)
domain.add_int("param_2", 1, 5)
domain.add_category("param_3", ["red", "blue", "green", "yellow", "purple"])

We create an ExperimentData object with the domain:

experiment_data = ExperimentData(domain=domain)

Then, we can create a GridSampler block object:

grid_sampler = GridSampler(stepsize_continuous_parameters={"param_1": 0.1})

We arm the sampler with the ExperimentData object:

grid_sampler.arm(experiment_data)

Lastly, we call the grids sampler with the created ExperimentData:

experiment_data = grid_sampler.call(experiment_data)
experiment_data

jobs   input                
          param_1 param_2 param_3
0    open    -1.0       1     red
1    open    -0.9       1     red
2    open    -0.8       1     red
3    open    -0.7       1     red
4    open    -0.6       1     red
..    ...     ...     ...     ...
495  open     0.5       5  purple
496  open     0.6       5  purple
497  open     0.7       5  purple
498  open     0.8       5  purple
499  open     0.9       5  purple

[500 rows x 4 columns]

Next: Building a Pipeline