Pre-annotate data with EDS-NLP
This tutorial shows how to pre-annotate date spans with EDS-NLP and add a boolean mammography flag when the date appears close to a mammography mention.
The goal is to expose this flag in Metanno as a checkbox in the entities widgets.
Build a contextual date pipeline
Use eds.dates to detect dates, then eds.contextual_matcher to enrich each date with an assigned value when a mammography term appears in the local context.
import edsnlp
import edsnlp.pipes as eds
nlp = edsnlp.blank("eds")
nlp.add_pipe(eds.sentences())
nlp.add_pipe(eds.normalizer())
nlp.add_pipe(eds.dates())
nlp.add_pipe(
eds.contextual_matcher(
patterns=[
dict(
span_getter="dates",
assign=[
dict(
name="mammography",
regex="(mammo-?graph)",
window="words[-10:10] & sent",
reduce_mode="keep_first",
),
],
source="mammography",
),
],
label="date",
),
);
Run it on some data first to ensure it works:
texts = ["Le patient a eu une mammographie le 12 juin. Aucune autre mammographie prévue."]
nlp.pipe(texts).to_pandas(converter="ents", span_attributes=["assigned", "date"])
| note_id | start | end | label | lexical_variant | span_type | assigned | date | |
|---|---|---|---|---|---|---|---|---|
| 0 | None | 36 | 43 | date | 12 juin | ents | {'mammography': 'mammographie'} | ????-06-12 |
Tip
If you only want dates with nearby mammography context, set required=True in the assign block of eds.contextual_matcher.
Apply the pipeline to some data
We'll assume you have a collection of documents you want to apply the pipeline too.
The example below shows how to load parquet data. Visit EDS-NLP's Data API docs to learn about other formats.
import uuid
def build_data():
# Apply the pipeline
# data = edsnlp.data.read_parquet(path_to_parquet_dataset)
# or, if you don't have data yet, comment the above and run
data = edsnlp.data.from_iterable(
[{"note_id": f"#{i}", "note_text": t} for i, t in enumerate(texts)],
converter="omop",
)
data = data.map_pipeline(nlp)
# And assemble your data in collections of dicts
notes = []
for idx, doc in enumerate(data):
note_entities = []
for e in doc.ents: # or doc.spans[...] if this is where your entities are
if e.label_ != "date":
continue
assigned = e._.assigned or {}
note_entities.append(
{
"id": f"#{uuid.uuid4()}",
"text": str(e),
"begin": e.start_char,
"end": e.end_char,
"label": "date",
"concept": None,
"mammography": bool(assigned.get("mammography")),
}
)
notes.append(
{
"note_id": str(doc._.note_id),
"note_text": doc.text,
"seen": False,
"entities": note_entities,
}
)
return {"notes": notes}
Build the widgets (note text + info form)
Create a DataWidgetFactory, then build:
- A note text view with editable entity fields (including
mammography). - An info view similar to Quaero: note form (with navigation buttons) + selected entity info.
from pret_joy import Box, Divider, Stack
from metanno.recipes.data_widget_factory import DataWidgetFactory, infer_fields
factory = DataWidgetFactory(
data=build_data,
# sync=True/path, enable sync to sync the user edits with the kernel/server
)
note_text_view, ent_view = factory.create_text_widget(
store_text_key="notes",
store_spans_key="notes.entities",
text_key="note_text",
text_primary_key="note_id",
spans_primary_key="id",
fields=infer_fields(
[e for n in factory.data["notes"] for e in n["entities"]],
visible_keys=["label", "mammography"],
editable_keys=["label", "mammography"],
categorical_keys=["label"],
),
labels={
"date": {"name": "Date", "color": "lightblue", "shortcut": "d"},
},
)
note_form_view = factory.create_form_widget(
store_key="notes",
primary_key="note_id",
fields=[
{"key": "note_id", "kind": "text"},
{"key": "seen", "kind": "boolean", "editable": True},
],
add_navigation_buttons=True,
)
info_view = Stack(Box(note_form_view, sx={"m": "10px"}), Divider(), Box(ent_view, sx={"m": "10px"}))
mammography is inferred as a boolean field, so it is rendered as a checkbox/toggle editor.
You can now either view these widgets separately, or arrange them in a single layout like we did in the Run the Quaero Explorer tutorial.
from pret.react import div
from pret_simple_dock import Layout, Panel
note_header = factory.create_selected_field_view(
store_key="notes",
shown_key="note_id",
fallback="Note",
)
layout = div(
Layout(
Panel(note_text_view, key="Note Text", header=note_header),
Panel(info_view, key="Info"),
default_config={
"kind": "row",
"children": [
{"tabs": ["Note Text"], "size": 65},
{"tabs": ["Info"], "size": 35},
],
},
),
style={
"background": "var(--joy-palette-background-level2, #f0f0f0)",
"width": "100%",
"height": "100%",
"minHeight": "420px",
"--sd-background-color": "transparent",
},
)
layout