Skip to content

glazing.framenet.search

Searching FrameNet data.

search

FrameNet search functionality.

This module provides search capabilities for FrameNet data, including frame searches by name and definition, frame element searches across frames, and lexical unit pattern matching.

CLASS DESCRIPTION
FrameNetSearch

Search interface for FrameNet frame lookups.

Classes

FrameNetSearch(frames: list[Frame] | None = None)

Search interface for FrameNet frame lookups.

Provides search methods for frames, frame elements, and lexical units with pattern matching and cross-frame queries.

PARAMETER DESCRIPTION
frames

Initial frames to index. If None, creates empty index.

TYPE: list[Frame] | None DEFAULT: None

ATTRIBUTE DESCRIPTION
_frames_by_id

Mapping from frame ID to frame object.

TYPE: dict[FrameID, Frame]

_frames_by_name

Mapping from frame name to frame object.

TYPE: dict[str, Frame]

_frames_by_lemma

Mapping from lemma to frames evoked by that lemma.

TYPE: dict[str, set[Frame]]

_fe_index

Mapping from FE name to frames containing that FE.

TYPE: dict[str, set[Frame]]

_lu_index

Mapping from lemma to lexical units.

TYPE: dict[str, set[LexicalUnit]]

METHOD DESCRIPTION
add_frame

Add a frame to the index.

get_frame_by_id

Get frame by ID.

get_frame_by_name

Get frame by name.

search_frames_by_name

Search frames by name pattern.

search_frames_by_definition

Search frames by definition pattern.

find_frames_with_fe

Find frames containing a specific FE.

find_frames_by_lemma

Find frames evoked by a lemma.

search_lexical_units

Search lexical units by pattern.

get_fe_across_frames

Get FE definitions across all frames.

Examples:

>>> search = FrameNetSearch()
>>> search.add_frame(abandonment_frame)
>>> frame = search.get_frame_by_name("Abandonment")
>>> frames = search.search_frames_by_definition("leave.*behind")

Initialize frame index with optional initial frames.

Source code in src/glazing/framenet/search.py
def __init__(self, frames: list[Frame] | None = None) -> None:
    """Initialize frame index with optional initial frames."""
    self._frames_by_id: dict[FrameID, Frame] = {}
    self._frames_by_name: dict[str, Frame] = {}
    self._frames_by_lemma: dict[str, set[FrameID]] = defaultdict(set)
    self._fe_index: dict[str, set[FrameID]] = defaultdict(set)
    self._lu_index: dict[str, set[tuple[FrameID, int]]] = defaultdict(set)

    if frames:
        for frame in frames:
            self.add_frame(frame)
Functions
add_frame(frame: Frame) -> None

Add a frame to the index.

PARAMETER DESCRIPTION
frame

Frame to add to index.

TYPE: Frame

RAISES DESCRIPTION
ValueError

If frame with same ID already exists.

Source code in src/glazing/framenet/search.py
def add_frame(self, frame: Frame) -> None:
    """Add a frame to the index.

    Parameters
    ----------
    frame : Frame
        Frame to add to index.

    Raises
    ------
    ValueError
        If frame with same ID already exists.
    """
    if frame.id in self._frames_by_id:
        msg = f"Frame with ID {frame.id} already exists in index"
        raise ValueError(msg)

    self._frames_by_id[frame.id] = frame
    self._frames_by_name[frame.name] = frame

    # Index frame elements
    for fe in frame.frame_elements:
        self._fe_index[fe.name].add(frame.id)

    # Index lexical units
    for lu in frame.lexical_units:
        # Extract lemma from LU name (format: lemma.pos)
        if "." in lu.name:
            lemma = lu.name.rsplit(".", 1)[0]
            self._frames_by_lemma[lemma].add(frame.id)
            self._lu_index[lemma].add((frame.id, lu.id))
by_element_properties(core_type: str | None = None, semantic_type: str | None = None) -> list[Frame]

Find frames by element properties.

PARAMETER DESCRIPTION
core_type

Filter by core type ("Core", "Non-Core", "Extra-Thematic").

TYPE: str | None DEFAULT: None

semantic_type

Filter by semantic type.

TYPE: str | None DEFAULT: None

RETURNS DESCRIPTION
list[Frame]

Frames with matching element properties.

Source code in src/glazing/framenet/search.py
def by_element_properties(
    self, core_type: str | None = None, semantic_type: str | None = None
) -> list[Frame]:
    """Find frames by element properties.

    Parameters
    ----------
    core_type : str | None, optional
        Filter by core type ("Core", "Non-Core", "Extra-Thematic").
    semantic_type : str | None, optional
        Filter by semantic type.

    Returns
    -------
    list[Frame]
        Frames with matching element properties.
    """
    matching_frames = []
    for frame in self._frames_by_id.values():
        filtered_elements = filter_elements_by_properties(
            frame.frame_elements,
            core_type=core_type,  # type: ignore[arg-type]
        )
        # Additional filtering for semantic_type if needed
        if semantic_type and filtered_elements:
            filtered_elements = [
                e
                for e in filtered_elements
                if hasattr(e, "semantic_type") and e.semantic_type == semantic_type
            ]
        if filtered_elements:
            matching_frames.append(frame)

    return sorted(matching_frames, key=lambda f: f.name)
by_syntax(pattern: str) -> list[Frame]

Find frames with valence patterns matching a syntactic pattern.

PARAMETER DESCRIPTION
pattern

Syntactic pattern (e.g., "NP V NP", "NP V PP").

TYPE: str

RETURNS DESCRIPTION
list[Frame]

Frames with matching valence patterns.

Source code in src/glazing/framenet/search.py
def by_syntax(self, pattern: str) -> list[Frame]:
    """Find frames with valence patterns matching a syntactic pattern.

    Parameters
    ----------
    pattern : str
        Syntactic pattern (e.g., "NP V NP", "NP V PP").

    Returns
    -------
    list[Frame]
        Frames with matching valence patterns.
    """
    parser = SyntaxParser()
    parsed_pattern = parser.parse(pattern)

    matching_frames = []
    for frame in self._frames_by_id.values():
        for lu in frame.lexical_units:
            if hasattr(lu, "valence_patterns") and lu.valence_patterns:
                for valence_pattern in lu.valence_patterns:
                    if self._valence_matches_pattern(valence_pattern, parsed_pattern):
                        matching_frames.append(frame)
                        break
                else:
                    continue
                break

    # Remove duplicates while preserving order
    seen_ids = set()
    unique_frames = []
    for frame in matching_frames:
        if frame.id not in seen_ids:
            seen_ids.add(frame.id)
            unique_frames.append(frame)

    return sorted(unique_frames, key=lambda f: f.name)
find_frames_by_lemma(lemma: str, pos: FrameNetPOS | None = None) -> list[Frame]

Find frames evoked by a lemma.

PARAMETER DESCRIPTION
lemma

Lemma to search for.

TYPE: str

pos

If specified, only return frames with LUs of this POS.

TYPE: FrameNetPOS | None DEFAULT: None

RETURNS DESCRIPTION
list[Frame]

Frames evoked by the lemma.

Source code in src/glazing/framenet/search.py
def find_frames_by_lemma(self, lemma: str, pos: FrameNetPOS | None = None) -> list[Frame]:
    """Find frames evoked by a lemma.

    Parameters
    ----------
    lemma : str
        Lemma to search for.
    pos : FrameNetPOS | None
        If specified, only return frames with LUs of this POS.

    Returns
    -------
    list[Frame]
        Frames evoked by the lemma.
    """
    frame_ids = self._frames_by_lemma.get(lemma, set())
    frames = [self._frames_by_id[fid] for fid in frame_ids]

    if pos is not None:
        filtered_frames = []
        for frame in frames:
            # Check if any LU for this lemma has the specified POS
            for lu in frame.lexical_units:
                if "." in lu.name:
                    lu_lemma = lu.name.rsplit(".", 1)[0]
                    if lu_lemma == lemma and lu.pos == pos:
                        filtered_frames.append(frame)
                        break
        frames = filtered_frames

    return sorted(frames, key=lambda f: f.name)
find_frames_with_fe(fe_name: str, core_type: CoreType | None = None) -> list[Frame]

Find frames containing a specific frame element.

PARAMETER DESCRIPTION
fe_name

Name of frame element to search for.

TYPE: str

core_type

If specified, only return frames where FE has this core type.

TYPE: CoreType | None DEFAULT: None

RETURNS DESCRIPTION
list[Frame]

Frames containing the specified FE.

Source code in src/glazing/framenet/search.py
def find_frames_with_fe(self, fe_name: str, core_type: CoreType | None = None) -> list[Frame]:
    """Find frames containing a specific frame element.

    Parameters
    ----------
    fe_name : str
        Name of frame element to search for.
    core_type : CoreType | None
        If specified, only return frames where FE has this core type.

    Returns
    -------
    list[Frame]
        Frames containing the specified FE.
    """
    frame_ids = self._fe_index.get(fe_name, set())
    frames = [self._frames_by_id[fid] for fid in frame_ids]

    if core_type is not None:
        filtered_frames = []
        for frame in frames:
            fe = frame.get_fe_by_name(fe_name)
            if fe and fe.core_type == core_type:
                filtered_frames.append(frame)
        frames = filtered_frames

    return sorted(frames, key=lambda f: f.name)
from_jsonl_file(path: Path | str) -> FrameNetSearch classmethod

Load index from JSON Lines file.

PARAMETER DESCRIPTION
path

Path to JSON Lines file containing frames.

TYPE: Path | str

RETURNS DESCRIPTION
FrameIndex

Index populated with frames from file.

RAISES DESCRIPTION
FileNotFoundError

If file does not exist.

ValueError

If file contains invalid data.

Source code in src/glazing/framenet/search.py
@classmethod
def from_jsonl_file(cls, path: Path | str) -> FrameNetSearch:
    """Load index from JSON Lines file.

    Parameters
    ----------
    path : Path | str
        Path to JSON Lines file containing frames.

    Returns
    -------
    FrameIndex
        Index populated with frames from file.

    Raises
    ------
    FileNotFoundError
        If file does not exist.
    ValueError
        If file contains invalid data.
    """
    path = Path(path)
    if not path.exists():
        msg = f"File not found: {path}"
        raise FileNotFoundError(msg)

    frames = []
    with path.open(encoding="utf-8") as f:
        for line_raw in f:
            line = line_raw.strip()
            if line:
                frame = Frame.model_validate_json(line)
                frames.append(frame)

    return cls(frames)
get_all_fe_names() -> list[str]

Get all unique frame element names across all frames.

RETURNS DESCRIPTION
list[str]

Sorted list of unique FE names.

Source code in src/glazing/framenet/search.py
def get_all_fe_names(self) -> list[str]:
    """Get all unique frame element names across all frames.

    Returns
    -------
    list[str]
        Sorted list of unique FE names.
    """
    return sorted(self._fe_index.keys())
get_all_lemmas() -> list[str]

Get all unique lemmas across all lexical units.

RETURNS DESCRIPTION
list[str]

Sorted list of unique lemmas.

Source code in src/glazing/framenet/search.py
def get_all_lemmas(self) -> list[str]:
    """Get all unique lemmas across all lexical units.

    Returns
    -------
    list[str]
        Sorted list of unique lemmas.
    """
    return sorted(self._frames_by_lemma.keys())
get_fe_across_frames(fe_name: str) -> dict[str, FrameElement]

Get frame element definitions across all frames.

PARAMETER DESCRIPTION
fe_name

Name of frame element to retrieve.

TYPE: str

RETURNS DESCRIPTION
dict[str, FrameElement]

Mapping from frame name to FE definition in that frame.

Source code in src/glazing/framenet/search.py
def get_fe_across_frames(self, fe_name: str) -> dict[str, FrameElement]:
    """Get frame element definitions across all frames.

    Parameters
    ----------
    fe_name : str
        Name of frame element to retrieve.

    Returns
    -------
    dict[str, FrameElement]
        Mapping from frame name to FE definition in that frame.
    """
    fe_definitions = {}
    frame_ids = self._fe_index.get(fe_name, set())
    for frame_id in frame_ids:
        frame = self._frames_by_id[frame_id]
        fe = frame.get_fe_by_name(fe_name)
        if fe:
            fe_definitions[frame.name] = fe

    return dict(sorted(fe_definitions.items()))
get_frame_by_id(frame_id: FrameID) -> Frame | None

Get frame by ID.

PARAMETER DESCRIPTION
frame_id

Frame ID to look up.

TYPE: FrameID

RETURNS DESCRIPTION
Frame | None

Frame if found, None otherwise.

Source code in src/glazing/framenet/search.py
def get_frame_by_id(self, frame_id: FrameID) -> Frame | None:
    """Get frame by ID.

    Parameters
    ----------
    frame_id : FrameID
        Frame ID to look up.

    Returns
    -------
    Frame | None
        Frame if found, None otherwise.
    """
    return self._frames_by_id.get(frame_id)
get_frame_by_name(name: str) -> Frame | None

Get frame by name.

PARAMETER DESCRIPTION
name

Frame name to look up.

TYPE: str

RETURNS DESCRIPTION
Frame | None

Frame if found, None otherwise.

Source code in src/glazing/framenet/search.py
def get_frame_by_name(self, name: str) -> Frame | None:
    """Get frame by name.

    Parameters
    ----------
    name : str
        Frame name to look up.

    Returns
    -------
    Frame | None
        Frame if found, None otherwise.
    """
    return self._frames_by_name.get(name)
get_statistics() -> dict[str, int]

Get index statistics.

RETURNS DESCRIPTION
dict[str, int]

Statistics about indexed data.

Source code in src/glazing/framenet/search.py
def get_statistics(self) -> dict[str, int]:
    """Get index statistics.

    Returns
    -------
    dict[str, int]
        Statistics about indexed data.
    """
    total_lus = sum(len(frame.lexical_units) for frame in self._frames_by_id.values())

    total_fes = sum(len(frame.frame_elements) for frame in self._frames_by_id.values())

    return {
        "frame_count": len(self._frames_by_id),
        "unique_fe_names": len(self._fe_index),
        "total_fes": total_fes,
        "unique_lemmas": len(self._frames_by_lemma),
        "total_lus": total_lus,
    }
merge(other: FrameNetSearch) -> None

Merge another index into this one.

PARAMETER DESCRIPTION
other

Index to merge into this one.

TYPE: FrameIndex

RAISES DESCRIPTION
ValueError

If there are conflicting frame IDs.

Source code in src/glazing/framenet/search.py
def merge(self, other: FrameNetSearch) -> None:
    """Merge another index into this one.

    Parameters
    ----------
    other : FrameIndex
        Index to merge into this one.

    Raises
    ------
    ValueError
        If there are conflicting frame IDs.
    """
    for frame_id, frame in other._frames_by_id.items():
        if frame_id in self._frames_by_id:
            msg = f"Cannot merge: frame ID {frame_id} exists in both indices"
            raise ValueError(msg)
        self.add_frame(frame)
search_frames_by_definition(pattern: str, case_sensitive: bool = False) -> list[Frame]

Search frames by definition pattern.

PARAMETER DESCRIPTION
pattern

Regular expression pattern to match against frame definitions.

TYPE: str

case_sensitive

Whether search should be case-sensitive.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
list[Frame]

Frames with definitions matching pattern.

RAISES DESCRIPTION
error

If pattern is invalid regular expression.

Source code in src/glazing/framenet/search.py
def search_frames_by_definition(
    self, pattern: str, case_sensitive: bool = False
) -> list[Frame]:
    """Search frames by definition pattern.

    Parameters
    ----------
    pattern : str
        Regular expression pattern to match against frame definitions.
    case_sensitive : bool
        Whether search should be case-sensitive.

    Returns
    -------
    list[Frame]
        Frames with definitions matching pattern.

    Raises
    ------
    re.error
        If pattern is invalid regular expression.
    """
    flags = 0 if case_sensitive else re.IGNORECASE
    regex = re.compile(pattern, flags)

    matching_frames = []
    for frame in self._frames_by_id.values():
        # Search in plain text of definition
        if regex.search(frame.definition.plain_text):
            matching_frames.append(frame)

    return sorted(matching_frames, key=lambda f: f.name)
search_frames_by_name(pattern: str, case_sensitive: bool = False) -> list[Frame]

Search frames by name pattern.

PARAMETER DESCRIPTION
pattern

Regular expression pattern to match against frame names.

TYPE: str

case_sensitive

Whether search should be case-sensitive.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
list[Frame]

Frames with names matching pattern.

RAISES DESCRIPTION
error

If pattern is invalid regular expression.

Source code in src/glazing/framenet/search.py
def search_frames_by_name(self, pattern: str, case_sensitive: bool = False) -> list[Frame]:
    """Search frames by name pattern.

    Parameters
    ----------
    pattern : str
        Regular expression pattern to match against frame names.
    case_sensitive : bool
        Whether search should be case-sensitive.

    Returns
    -------
    list[Frame]
        Frames with names matching pattern.

    Raises
    ------
    re.error
        If pattern is invalid regular expression.
    """
    flags = 0 if case_sensitive else re.IGNORECASE
    regex = re.compile(pattern, flags)

    matching_frames = []
    for name, frame in self._frames_by_name.items():
        if regex.search(name):
            matching_frames.append(frame)

    return sorted(matching_frames, key=lambda f: f.name)
search_lexical_units(pattern: str, pos: FrameNetPOS | None = None, case_sensitive: bool = False) -> list[LexicalUnit]

Search lexical units by pattern.

PARAMETER DESCRIPTION
pattern

Regular expression pattern to match against LU names.

TYPE: str

pos

If specified, only return LUs with this POS.

TYPE: FrameNetPOS | None DEFAULT: None

case_sensitive

Whether search should be case-sensitive.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
list[LexicalUnit]

Lexical units matching the pattern.

RAISES DESCRIPTION
error

If pattern is invalid regular expression.

Source code in src/glazing/framenet/search.py
def search_lexical_units(
    self, pattern: str, pos: FrameNetPOS | None = None, case_sensitive: bool = False
) -> list[LexicalUnit]:
    """Search lexical units by pattern.

    Parameters
    ----------
    pattern : str
        Regular expression pattern to match against LU names.
    pos : FrameNetPOS | None
        If specified, only return LUs with this POS.
    case_sensitive : bool
        Whether search should be case-sensitive.

    Returns
    -------
    list[LexicalUnit]
        Lexical units matching the pattern.

    Raises
    ------
    re.error
        If pattern is invalid regular expression.
    """
    flags = 0 if case_sensitive else re.IGNORECASE
    regex = re.compile(pattern, flags)

    matching_lus = []
    seen_lu_ids = set()
    for lu_refs in self._lu_index.values():
        for frame_id, lu_id in lu_refs:
            if lu_id not in seen_lu_ids:
                frame = self._frames_by_id[frame_id]
                for lu in frame.lexical_units:
                    if (
                        lu.id == lu_id
                        and regex.search(lu.name)
                        and (pos is None or lu.pos == pos)
                    ):
                        matching_lus.append(lu)
                        seen_lu_ids.add(lu_id)
                        break

    return sorted(matching_lus, key=lambda lu: lu.name)

Functions