{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# MaldiAMRKit - Quick Start\n", "\n", "This notebook covers the basics of loading, preprocessing, and binning MALDI-TOF spectra." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If you haven't installed the package yet, run:\n", "```bash\n", "pip install maldiamrkit\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Import MaldiAMRKit" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2026-02-08T15:04:26.072070Z", "iopub.status.busy": "2026-02-08T15:04:26.071970Z", "iopub.status.idle": "2026-02-08T15:04:26.684663Z", "shell.execute_reply": "2026-02-08T15:04:26.684010Z" } }, "outputs": [], "source": "from maldiamrkit import MaldiSet, MaldiSpectrum\nfrom maldiamrkit.filters import DrugFilter, MetadataFilter, SpeciesFilter\nfrom maldiamrkit.preprocessing import (\n ClipNegatives,\n LogTransform,\n MedianNormalizer,\n MzTrimmer,\n PreprocessingPipeline,\n SavitzkyGolaySmooth,\n SNIPBaseline,\n SpectrumQuality,\n estimate_snr,\n)\nfrom maldiamrkit.susceptibility import LabelEncoder\nfrom maldiamrkit.visualization import plot_pseudogel, plot_spectrum" }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Preprocessing Pipeline\n", "\n", "Inspect the default composable preprocessing pipeline." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2026-02-08T15:04:26.686879Z", "iopub.status.busy": "2026-02-08T15:04:26.686678Z", "iopub.status.idle": "2026-02-08T15:04:26.690296Z", "shell.execute_reply": "2026-02-08T15:04:26.689889Z" } }, "outputs": [ { "data": { "text/plain": [ "PreprocessingPipeline([\n", " ('clip', ClipNegatives()),\n", " ('sqrt', SqrtTransform()),\n", " ('smooth', SavitzkyGolaySmooth(window_length=20, polyorder=2)),\n", " ('baseline', SNIPBaseline(half_window=40)),\n", " ('trim', MzTrimmer(mz_min=2000, mz_max=20000)),\n", " ('normalize', TICNormalizer())\n", "])" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pipe = PreprocessingPipeline.default()\n", "pipe" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load and Preprocess a Single Spectrum" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2026-02-08T15:04:26.708633Z", "iopub.status.busy": "2026-02-08T15:04:26.708402Z", "iopub.status.idle": "2026-02-08T15:04:34.807856Z", "shell.execute_reply": "2026-02-08T15:04:34.807111Z" } }, "outputs": [], "source": "# Load, preprocess (smoothing, baseline removal, normalization), and bin\nspec = MaldiSpectrum(\"../data/1s.txt\").preprocess()\nspec.bin(3) # bin width 3 Da\n\n# Plot the binned spectrum\n_ = plot_spectrum(spec, stage=\"binned\")" }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Verbose Mode\n", "\n", "Enable verbose mode to see processing messages." ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2026-02-08T15:04:34.809508Z", "iopub.status.busy": "2026-02-08T15:04:34.809234Z", "iopub.status.idle": "2026-02-08T15:04:34.831421Z", "shell.execute_reply": "2026-02-08T15:04:34.830726Z" } }, "outputs": [ { "data": { "text/html": [ "
| \n", " | mass | \n", "intensity | \n", "
|---|---|---|
| 0 | \n", "2000 | \n", "0.000039 | \n", "
| 1 | \n", "2003 | \n", "0.000041 | \n", "
| 2 | \n", "2006 | \n", "0.000083 | \n", "
| 3 | \n", "2009 | \n", "0.000123 | \n", "
| 4 | \n", "2012 | \n", "0.000109 | \n", "
| ... | \n", "... | \n", "... | \n", "
| 5995 | \n", "19985 | \n", "0.000087 | \n", "
| 5996 | \n", "19988 | \n", "0.000054 | \n", "
| 5997 | \n", "19991 | \n", "0.000048 | \n", "
| 5998 | \n", "19994 | \n", "0.000050 | \n", "
| 5999 | \n", "19997 | \n", "0.000012 | \n", "
6000 rows × 2 columns
\n", "| \n", " | Drug | \n", "
|---|---|
| 10s | \n", "S | \n", "
| 11s | \n", "R | \n", "
| 12s | \n", "R | \n", "
| 13s | \n", "S | \n", "
| 14s | \n", "S | \n", "