Skip to content

raw

convert_raw(source, ini_path=None, xml_path=None, out=None, recursive=False, include=None, exclude=None, compression=None, compression_opts=None, generate_timestamps=False, force=False)

Converts RAW data and metadata to HDF5.

Note that SOURCE can be either a single file or a directory. If it is a directory, all the RAW files it contains will be converted.

If '--ini-path' is not provided, it will default to the same path as the source file with the extension changed to '.ini'. If '--xml-path' is not provided, it will default to the same path as the source file with the extension changed to '.xml', and the 'XYT' ending changed to 'OME'. Note the OME-XML path is optional if the INI file contains the OME-XML as an entry.

If generate_timestamps is set, a .notes.txt file with the same name as the RAW file should also be present.

Parameters:

Name Type Description Default
source Path

Source file or directory to convert. If a directory, the default is to look for RAW files inside of it without recursion.

required
ini_path Path | None

Path to the INI file containing metadata about SOURCE. This is ignored if SOURCE is a directory.

None
xml_path Path | None

Path to the XML file containing metadata about SOURCE. This is ignored if SOURCE is a directory.

None
out Path | None

Optional output directory for converted files.

None
recursive bool

Whether to search directories recursively when looking for RAW files.

False
include str | None

Include filters to apply when searching for RAW files. This supports regular-expressions. Include filters are applied before any exclude filters.

None
exclude str | None

Exclude filters to apply when searching for RAW files. This supports regular-expressions. Exclude filters are applied after all include filters.

None
compression COMPRESSION | None

Compression algorithm to use.

None
compression_opts int | None

Compression options to use with the given algorithm.

None
generate_timestamps bool

Whether to generate timestamps from the notes entries of the RAW files. A ".notes.txt" file should be present along the RAW file when this is set.

False
force bool

Whether to overwrite output files if they exist.

False
Source code in src/drim2p/convert/raw.py
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
def convert_raw(
    source: pathlib.Path,
    ini_path: pathlib.Path | None = None,
    xml_path: pathlib.Path | None = None,
    out: pathlib.Path | None = None,
    recursive: bool = False,
    include: str | None = None,
    exclude: str | None = None,
    compression: io.COMPRESSION | None = None,
    compression_opts: int | None = None,
    generate_timestamps: bool = False,
    force: bool = False,
) -> None:
    """Converts RAW data and metadata to HDF5.

    Note that SOURCE can be either a single file or a directory. If it is a directory,
    all the RAW files it contains will be converted.

    If '--ini-path' is not provided, it will default to the same path as the source file
    with the extension changed to '.ini'.
    If '--xml-path' is not provided, it will default to the same path as the source file
    with the extension changed to '.xml', and the 'XYT' ending changed to 'OME'. Note
    the OME-XML path is optional if the INI file contains the OME-XML as an entry.

    If `generate_timestamps` is set, a `.notes.txt` file with the same name as the RAW
    file should also be present.

    Args:
        source (pathlib.Path):
            Source file or directory to convert. If a directory, the default is to look
            for RAW files inside of it without recursion.
        ini_path (pathlib.Path | None, optional):
            Path to the INI file containing metadata about SOURCE. This is ignored if
            SOURCE is a directory.
        xml_path (pathlib.Path | None, optional):
            Path to the XML file containing metadata about SOURCE. This is ignored if
            SOURCE is a directory.
        out (pathlib.Path | None, optional):
            Optional output directory for converted files.
        recursive (bool, optional):
            Whether to search directories recursively when looking for RAW files.
        include (str | None, optional):
            Include filters to apply when searching for RAW files. This supports
            regular-expressions. Include filters are applied before any exclude filters.
        exclude (str | None, optional):
            Exclude filters to apply when searching for RAW files. This supports
            regular-expressions. Exclude filters are applied after all include filters.
        compression (io.COMPRESSION | None, optional): Compression algorithm to use.
        compression_opts (int | None, optional):
            Compression options to use with the given algorithm.
        generate_timestamps (bool, optional):
            Whether to generate timestamps from the notes entries of the RAW files. A
            ".notes.txt" file should be present along the RAW file when this is set.
        force (bool, optional): Whether to overwrite output files if they exist.
    """
    # Collect RAW file paths to convert
    raw_paths = io.find_paths(source, [".raw"], include, exclude, recursive, True)

    # If we are going to process at least a file, ensure the output directory exists
    if len(raw_paths) > 0 and out is not None:
        # Only allow creating a directory inside an existing parent. We should only
        # support creating a single directory, not a nested hierarchy as a single typo
        # in a path can result in a lot of folders being created in a way a user might
        # not expect.
        try:
            out.mkdir(exist_ok=True)
        except FileNotFoundError:
            _logger.exception(
                f"Neither provided output directory '{out}' nor its parent exist. "
                f"Aborting."
            )
            return

    # Ignore ini_path and xml_path if we are working with a directory
    if source.is_dir():
        ini_path = None
        xml_path = None

    for path in raw_paths:
        # Shortcircuit early if we won't write
        out_path = (
            out / path.with_suffix(".h5").name
            if out is not None
            else path.with_suffix(".h5")
        )
        if out_path.exists() and not force:
            _logger.info(
                f"Skipping '{path}' as it already exists and --force is not set."
            )
            continue

        _logger.info(f"Converting '{path}'.")

        # Retrieve INI metadata
        ini_metadata_path = ini_path or path.with_suffix(".ini")
        ini_metadata = {}
        if ini_metadata_path.exists():
            try:
                ini_metadata = raw_io.parse_metadata_from_ini(
                    ini_metadata_path, typed=True
                )
            except ValueError as e:
                _logger.warning(e)
        else:
            _logger.debug(f"No INI metadata found for '{path}'.")

        # Retrieve XML metadata
        xml_string = None
        if ini_metadata:
            xml_string = ini_metadata.get("ome.xml.string")
            if xml_string is None:
                _logger.debug(
                    "Failed to retrieve XML metadata from INI metadata. Trying to use "
                    "the XML file directly."
                )
            else:
                _logger.debug("Using XML string from INI file.")

        if xml_string is None:
            xml_metadata_path = xml_path or _find_xml_path(path)
            if xml_metadata_path is None or not xml_metadata_path.exists():
                _logger.debug(f"No XML metadata found for '{path}'.")
                _logger.error(
                    f"Failed to retrieve OME-XML metadata from INI file or directly "
                    f"through XML file for '{path}', skipping RAW file. "
                    f"To use the XML file, make sure it has the same file name as the "
                    f"RAW file with the '.xml' or '.ome.xml' extension(s)."
                )
                continue

            _logger.debug("Using XML string from XML file.")
            xml_string = xml_metadata_path.open().read()

        shape, dtype = raw_io.parse_metadata_from_ome(xml_string)

        # Generate timestamps if requested
        timestamps = None
        if generate_timestamps:
            timestamps = _generate_timestamps(path, ini_metadata)

        # Convert RAW to numpy
        _logger.debug(f"Reading as array using metadata: {shape=}, {dtype=}.")
        array = raw_io.read_raw_as_numpy(path, shape, dtype)

        # Output as HDF5
        compression, compression_opts, shuffle = io.get_h5py_compression_parameters(
            compression, compression_opts
        )

        _logger.debug(
            f"Writing HDF5 to '{out_path}' "
            f"({compression=}, {compression_opts=}, {shuffle=})."
        )
        with h5py.File(out_path, "w") as handle:
            dataset = handle.create_dataset(
                "data",
                data=array,
                # Chunk per frame, same for writing but speeds up reading a lot
                chunks=(1, *shape[1:]),
                compression=compression,
                compression_opts=compression_opts,
                shuffle=shuffle,
            )

            for key, value in ini_metadata.items():
                dataset.attrs[key] = value

            if timestamps is not None:
                handle.create_dataset(
                    "timestamps",
                    data=timestamps,
                    compression=compression,
                    compression_opts=compression_opts,
                    shuffle=shuffle,
                )

        _logger.info(f"Finished converting '{path}'.")

convert_raw_command(**kwargs)

Converts RAW data and metadata to HDF5.

Note that SOURCE can be either a single file or a directory. If it is a directory, all the RAW files it contains will be converted.

If '--ini-path' is not provided, it will default to the same path as the source file with the extension changed to '.ini'. If '--xml-path' is not provided, it will default to the same path as the source file with the extension changed to '.xml', and the 'XYT' ending changed to 'OME'. Note the OME-XML path is optional if the INI file contains the OME-XML as an entry.

If generate_timestamps is set, a .notes.txt file with the same name as the RAW file should also be present.

Source code in src/drim2p/convert/raw.py
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
@click.command("raw")
@click.argument(
    "source",
    required=False,
    type=click.Path(
        exists=True,
        file_okay=True,
        dir_okay=True,
        readable=True,
        path_type=pathlib.Path,
    ),
    callback=cli_utils.noop_if_missing,
)
@click.option(
    "--ini-path",
    required=False,
    type=click.Path(
        exists=True,
        file_okay=True,
        dir_okay=False,
        path_type=pathlib.Path,
    ),
    help=(
        "Path to the INI file containing metadata about SOURCE. "
        "This is ignored if SOURCE is a directory."
    ),
)
@click.option(
    "--xml-path",
    required=False,
    type=click.Path(
        exists=True,
        file_okay=True,
        dir_okay=False,
        path_type=pathlib.Path,
    ),
    help=(
        "Path to the OME-XML file containing metadata about SOURCE. "
        "This is ignored if SOURCE is a directory."
    ),
)
@click.option(
    "-o",
    "--out",
    required=False,
    type=click.Path(
        exists=False,
        file_okay=False,
        dir_okay=True,
        writable=True,
        path_type=pathlib.Path,
    ),
    help=(
        "Output directory in which to put the converted files. "
        "Default is to output in the same directory as SOURCE."
    ),
)
@click.option(
    "-r",
    "--recursive",
    required=False,
    is_flag=True,
    help="Whether to search directories recursively when looking for RAW files.",
)
@click.option(
    "-i",
    "--include",
    required=False,
    default=None,
    help=(
        "Include filters to apply when searching for RAW files. "
        "This supports regular-expressions. Include filters are applied before any "
        "exclude filters."
    ),
)
@click.option(
    "-e",
    "--exclude",
    required=False,
    default=None,
    help=(
        "Exclude filters to apply when searching for RAW files. "
        "This supports regular-expressions. Exclude filters are applied after all "
        "include filters."
    ),
)
@click.option(
    "-c",
    "--compression",
    required=False,
    type=click.Choice(get_args(io.COMPRESSION), case_sensitive=False),
    default=None,
    callback=lambda _, __, x: x if x is None else x.lower(),
    help="Compression algorithm to use.",
)
@click.option(
    "--aggression",
    "compression_opts",
    required=False,
    type=click.IntRange(0, 9),
    default=4,
    help=(
        "Aggression level to use for GZIP compression. Lower means faster/worse "
        "compression, higher means slower/better compression. Ignored if "
        "'--compression' is not GZIP."
    ),
)
@click.option(
    "--generate-timestamps",
    required=False,
    is_flag=True,
    help="Whether to generate timestamps from the notes entries of the RAW files.",
)
@click.option(
    "--force",
    required=False,
    is_flag=True,
    help="Whether to overwrite output files if they exist.",
)
def convert_raw_command(**kwargs: Any) -> None:
    """Converts RAW data and metadata to HDF5.

    Note that SOURCE can be either a single file or a directory. If it is a directory,
    all the RAW files it contains will be converted.

    If '--ini-path' is not provided, it will default to the same path as the source file
    with the extension changed to '.ini'.
    If '--xml-path' is not provided, it will default to the same path as the source file
    with the extension changed to '.xml', and the 'XYT' ending changed to 'OME'. Note
    the OME-XML path is optional if the INI file contains the OME-XML as an entry.

    If `generate_timestamps` is set, a `.notes.txt` file with the same name as the RAW
    file should also be present.
    """
    convert_raw(**kwargs)

generate_timestamps_for_note_entry(entry, frame_count)

Generates a timestamps series for a given notes entry and a frame count.

Parameters:

Name Type Description Default
entry NotesEntry

Entry for which to generate timestamps.

required
frame_count int

Integer count of the frames for the given entry.

required

Returns:

Type Description
ndarray[Any, dtype[number]]

A series of timestamps for each frame.

Source code in src/drim2p/convert/raw.py
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
def generate_timestamps_for_note_entry(
    entry: models.NotesEntry, frame_count: int
) -> np.ndarray[Any, np.dtype[np.number]]:
    """Generates a timestamps series for a given notes entry and a frame count.

    Args:
        entry (models.NotesEntry): Entry for which to generate timestamps.
        frame_count (int): Integer count of the frames for the given entry.

    Returns:
        A series of timestamps for each frame.
    """
    delta = entry.timedelta_ms
    frame_spacing = delta / frame_count

    return np.array([i * frame_spacing for i in range(frame_count)])