Module gatenlp.annotation

Module for Annotation class which represents information about a span of text in a document.

Expand source code
"""
Module for Annotation class which represents information about a span of text in  a document.
"""
import copy as lib_copy
from functools import total_ordering
from gatenlp.features import Features
from gatenlp.offsetmapper import OFFSET_TYPE_JAVA, OFFSET_TYPE_PYTHON
from gatenlp.utils import allowspan, support_annotation_or_set
from gatenlp.span import Span


@total_ordering
class Annotation:   # pylint: disable=R0904
    """
    An annotation represents information about a span of text. It contains the start and end
    offsets of the span, an "annotation type" and an arbitrary number of features.

    In addition it contains an id which has no meaning for the annotation itself but is
    used to uniquely identify an annotation within the set it is contained in.

    All fields except the features are immutable, once the annotation has been created
    only the features can be changed.
    """

    @allowspan
    def __init__(self, start: int, end: int, anntype: str, features=None, annid: int = 0):
        """
        This constructor creates a new annotation instance. Once an annotation has been created,
        the start, end, type and id fields cannot be changed.

        NOTE: this should almost never be done directly
        and instead the method AnnotationSet.add should be used.

        Args:
            start: start offset of the annotation
            end: end offset of the annotation
            anntype: annotation type
            features: an initial collection of features, None for no features.
            annid: the id of the annotation
        """
        if end < start:
            raise Exception(
                f"Cannot create annotation start={start}, end={end}, type={anntype}, "
                "id={annid}, features={features}: start > end"
            )
        if not isinstance(annid, int):
            raise Exception(
                f"Cannot create annotation start={start}, end={end}, type={anntype}, "
                "id={annid}, features={features}: annid is not an int"
            )
        if isinstance(features, int):
            raise Exception(
                f"Cannot create annotation start={start}, end={end}, type={anntype}, "
                "id={annid}, features={features}: features must not be an int, mixed up with annid?"
            )
        self._owner_set = None
        self._features = Features(features, _change_logger=self._log_feature_change)
        self._type = anntype
        self._start = start
        self._end = end
        self._id = annid

    @property
    def type(self) -> str:
        """
        Returns the annotation type.
        """
        return self._type

    @property
    def start(self) -> int:
        """
        Returns the start offset.
        """
        return self._start

    @property
    def end(self):
        """
        Returns the end offset.
        """
        return self._end

    @property
    def features(self):
        """
        Returns the features for the annotation.
        """
        return self._features

    @property
    def id(self):  # pylint: disable=C0103
        """
        Returns the annotation id.
        """
        return self._id

    @property
    def span(self) -> Span:
        """
        Returns a Span instance for this annotation.
        """
        return Span(self._start, self._end)

    def _changelog(self):
        if self._owner_set is not None:
            return self._owner_set.changelog
        return None

    # TODO: for now at least, make sure only simple JSON serialisable things are used! We do NOT
    # allow any user specific types in order to make sure what we create is interchangeable
    # with GATE.
    # In addition we do NOT allow None features.
    # So a feature name always has to be a string (not None), the value has to be anything
    # that is json
    # serialisable (except None keys for maps).
    # For performance reasons we check the feature name but not the value (maybe make checking
    # optional
    # on by default but still optional?)
    def _log_feature_change(self, command: str, feature: str = None, value=None) -> None:
        """

        Args:
          command: str:
          feature: str:  (Default value = None)
          value:  (Default value = None)

        Returns:

        """
        if self._changelog() is None:
            return
        command = "ann-" + command
        change = {
            "command": command,
            "type": "annotation",
            "set": self._owner_set.name,
            "id": self.id,
        }
        if feature is not None:
            change["feature"] = feature
        if value is not None:
            change["value"] = value
        self._changelog().append(change)

    def equal(self, other):
        """
        Compare the content of this annotation with other. This returns True if all public attributes
        of the annotations excluding the annotation id (which is ignored) are equal.

        Args:
            other: another object
        """
        return isinstance(other, Annotation) and \
            self.start == other.start and \
            self.end == other.end and \
            self.features == other.features

    def same(self, other):
        """
        Compare the content of this annotation with other. This returns True if all public attributes
        of the annotations including the annotation id are equal.

        Args:
            other: another object
        """
        return isinstance(other, Annotation) and \
            self.id == other.id and \
            self.start == other.start and \
            self.end == other.end and \
            self.features == other.features

    def __lt__(self, other) -> bool:
        """
        Comparison for sorting: this sorts by increasing start offset,
        then increasing annotation id.
        Since annotation ids within a set are unique, this guarantees a unique order of
        annotations that come from an annotation set. For other collections of annotations,
        the order between those with same start offset and same id is undefined.

        IMPORTANT: this is not consistent with our definition of equality and meant for
        easy sorting of annotations from a single set only! Equality is based on the actual
        content of all the fields in an annotation, including the id, while the less than
        relation is only based on start offset and id. It is therefore possible for two
        annotations A1, A2 that A not < B, B not < A and A != B.

        Note: this is defined to match the default order of the default iterator of an
        AnnotationSet. The default order of Span is different, so ordering sorting annotations
        directly and sorting them by their Span can result in different orderings.

        Note: for now the other object has to be an instance of Annotation, duck typing is
        not supported!
        """
        if not isinstance(other, Annotation):
            raise Exception("Cannot compare to non-Annotation")
        if self.start < other.start:
            return True
        elif self.start > other.start:
            return False
        else:
            return self.id < other.id

    def __repr__(self) -> str:
        """
        String representation of the annotation.
        """
        return "Annotation({},{},{},features={},id={})".format(
            self.start, self.end, self.type, self._features, self.id
        )

    @property
    def length(self) -> int:
        """
        Returns the length of the annotation: this is the length of the offset span.
        Since the end offset is one after the last
        element, we return end-start. Note: this is deliberately not implemented as len(ann), as
        len(annset) returns the number of annotations in the set but annset.length()
        also returns the
        span length of the annotation set, so the method name for this is identical between
        annotations
        and annotation sets.
        """
        return self.end - self.start

    @support_annotation_or_set
    def isoverlapping(self, start: int, end: int) -> bool:
        """
        Checks if this annotation is overlapping with the given span, annotation or
        annotation set.

        Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
          (see gatenlp._utils.support_annotation_or_set)

        Args:
          start: start offset of the span
          end: end offset of the span

        Returns:
          `True` if overlapping, `False` otherwise

        """
        if start == end:
            return self.iscovering(start)
        else:
            return self.iscovering(start) or self.iscovering(end - 1)

    @support_annotation_or_set
    def isleftoverlapping(self, start: int, end: int) -> bool:
        """
        Checks if this annotation is overlapping with the given span, annotation or
        annotation set on the left, i.e. the last character is inside the span and the
        first character is before the span.

        Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
          (see gatenlp._utils.support_annotation_or_set)

        Args:
          start: start offset of the span
          end: end offset of the span

        Returns:
          `True` if left-overlapping, `False` otherwise

        """
        return self.start <= start and self.end <= end

    @support_annotation_or_set
    def isrightoverlapping(self, start: int, end: int) -> bool:
        """
        Checks if this annotation is overlapping with the given span, annotation or
        annotation set on the right, i.e. the first character is inside the span.

        Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
          (see gatenlp._utils.support_annotation_or_set)

        Args:
          start: start offset of the span
          end: end offset of the span

        Returns:
          `True` if right-overlapping, `False` otherwise

        """
        return self.start >= start and self.end >= end

    @support_annotation_or_set
    def iscoextensive(self, start: int, end: int) -> bool:
        """
        Checks if this annotation is coextensive with the given span, annotation or
        annotation set, i.e. has exactly the same start and end offsets.

        Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
          (see gatenlp._utils.support_annotation_or_set)

        Args:
          start: start offset of the span
          end: end offset of the span

        Returns:
          `True` if coextensive, `False` otherwise

        """
        return self.start == start and self.end == end

    @support_annotation_or_set
    def iswithin(self, start: int, end: int) -> bool:
        """
        Checks if this annotation is within the given span, annotation or
        annotation set, i.e. both the start and end offsets of this annotation
        are after the given start and before the given end.

        Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
          (see gatenlp._utils.support_annotation_or_set)

        Args:
          start: start offset of the span
          end: end offset of the span

        Returns:
          `True` if within, `False` otherwise

        """
        return start <= self.start and end >= self.end

    @support_annotation_or_set
    def isbefore(self, start: int, end: int, immediately=False) -> bool:
        """
        Checks if this annotation is before the other span, i.e. the end of this annotation
        is before the start of the other annotation or span.

        Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
          (see gatenlp._utils.support_annotation_or_set)

        Args:
            start: start offset of the span
            end: end offset of the span
            immediately: if true checks if this annotation ends immediately before the
                other one (Default value = False)

        Returns:
          True if before, False otherwise

        """
        if immediately:
            return self.end == start
        else:
            return self.end <= start

    @support_annotation_or_set
    def isafter(self, start: int, end: int, immediately=False) -> bool:
        """Checks if this annotation is after the other span, i.e. the start of this annotation
        is after the end of the other annotation or span.

        Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
          (see gatenlp._utils.support_annotation_or_set)

        Args:
          start: start offset of the span
          end: end offset of the span
          immediately: if true checks if this annotation starts immediately after the other one
              (Default value = False)

        Returns:
          True if after, False otherwise

        """
        if immediately:
            return self.start == end
        else:
            return self.start >= end

    @support_annotation_or_set
    def isstartingat(self, start: int, end_: int) -> bool:
        """
        Return True iff the annotation starts at the given start offset.

        Args:
            start: start offset
            end_: only present so the method can be used with anything that can be interpreted as
                a span (AnnotationSet, Span)
        """
        return self._start == start

    @support_annotation_or_set
    def isendingwith(self, start: int, end: int) -> bool:
        """
        Checks if this annotation is ending at the same offset as the given span or annotation.

        Args:
            start: start of the span (ignored)
            end: end of the span

        Returns:
            True if ending at the same offset as the span or annotation

        """
        return self._end == end

    @support_annotation_or_set
    def gap(self, start: int, end: int) -> int:
        """
        Return the gep between this annotation and the other annotation.
        This is the distance between
        the last character of the first annotation and the first character of the
        second annotation in
        sequence, so it is always independent of the order of the two annotations.

        This is negative if the annotations overlap.

        Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
            (see gatenlp._utils.support_annotation_or_set)

        Args:
            start: start offset of span
            end: end offset of span

        Returns:
          size of gap

        """
        if self.start < start:
            # ann1start = self.start
            ann1end = self.end
            ann2start = start
            # ann2end = end
        else:
            ann2start = self.start
            # ann2end = self.end
            # ann1start = start
            ann1end = end
        return ann2start - ann1end

    @support_annotation_or_set
    def iscovering(self, start: int, end=None) -> bool:
        """Checks if this annotation is covering the given span, annotation or
        annotation set, i.e. both the given start and end offsets
        are after the start of this annotation and before the end of this annotation.

        If end is not given, then the method checks if start is an offset of a character
        contained in the span.

        Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
          (see gatenlp._utils.support_annotation_or_set)

        Args:
          start: start offset of the span
          end: end offset of the span

        Returns:
          True if covering, False otherwise

        """
        if end is None:
            if self.end == self.start:
                return self.start == start
            else:
                return self.start <= start < self.end
        else:
            return self.start <= start and self.end >= end

    def to_dict(self, offset_mapper=None, offset_type=None, **kwargs):
        """
        Return a representation of this annotation as a nested map. This representation is
        used for several serialization methods.

        Args:
            offset_mapper: the offset mapper to use, must be specified if
                `offset_type` is specified.
            offset_type: the offset type to be used for the conversionm must be specified if
                `offset_mapper` is specified

        Returns:
            the dictionary representation of the Annotation
        """
        if (offset_mapper and not offset_type) or (not offset_mapper and offset_type):
            raise Exception(
                "offset_mapper and offset_type must be specified both or none"
            )
        if offset_mapper is not None:
            if offset_type == OFFSET_TYPE_JAVA:
                start = offset_mapper.convert_to_java(self._start)
                end = offset_mapper.convert_to_java(self._end)
            elif offset_type == OFFSET_TYPE_PYTHON:
                start = offset_mapper.convert_to_python(self._start)
                end = offset_mapper.convert_to_python(self._end)
            else:
                raise Exception(
                    f"Not a valid offset type: {offset_type}, must be 'p' or 'j'"
                )
        else:
            start = self._start
            end = self._end
        return {
            "type": self.type,
            "start": start,
            "end": end,
            "id": self.id,
            "features": self._features.to_dict(),
        }

    @staticmethod
    def from_dict(dictrepr, owner_set=None, **kwargs):
        """
        Construct an annotation object from the dictionary representation.

        Args:
          dictrepr: dictionary representation
          owner_set: the owning set the annotation should have (Default value = None)
          kwargs: ignored
        """
        ann = Annotation(
            dictrepr.get("start"),
            dictrepr.get("end"),
            dictrepr.get("type"),
            annid=dictrepr.get("id"),
            features=dictrepr.get("features"),
        )
        ann._owner_set = owner_set
        return ann

    def __copy__(self):
        return Annotation(
            self._start, self._end, self._type, annid=self._id, features=self._features
        )

    def copy(self):
        """
        Return a shallow copy of the annotation (features are shared).
        """
        return self.__copy__()

    def __deepcopy__(self, memo=None):
        if self._features is not None:
            fts = lib_copy.deepcopy(self._features.to_dict(), memo=memo)
        else:
            fts = None
        return Annotation(
            self._start, self._end, self._type, annid=self._id, features=fts
        )

    def deepcopy(self, memo=None):
        """
        Return a deep copy of the annotation (features and their values are copied as well).
        """
        return lib_copy.deepcopy(self, memo=memo)

    def _update_offsets(self, start, end):
        """
        Update the start and/or end offset of an existing annotation. THIS IS FOR INTERNAL USE ONLY!
        This method should never be use by library users as it can cause many different, unexpected
        and hard to debug problems. This method is here for very limited use by library-internal methods only!

        Args:
            start: new start offset
            end: new end offset
        """
        self._start = start
        self._end = end

Classes

class Annotation (start: int, end: int, anntype: str, features=None, annid: int = 0)

An annotation represents information about a span of text. It contains the start and end offsets of the span, an "annotation type" and an arbitrary number of features.

In addition it contains an id which has no meaning for the annotation itself but is used to uniquely identify an annotation within the set it is contained in.

All fields except the features are immutable, once the annotation has been created only the features can be changed.

This constructor creates a new annotation instance. Once an annotation has been created, the start, end, type and id fields cannot be changed.

NOTE: this should almost never be done directly and instead the method AnnotationSet.add should be used.

Args

start
start offset of the annotation
end
end offset of the annotation
anntype
annotation type
features
an initial collection of features, None for no features.
annid
the id of the annotation
Expand source code
class Annotation:   # pylint: disable=R0904
    """
    An annotation represents information about a span of text. It contains the start and end
    offsets of the span, an "annotation type" and an arbitrary number of features.

    In addition it contains an id which has no meaning for the annotation itself but is
    used to uniquely identify an annotation within the set it is contained in.

    All fields except the features are immutable, once the annotation has been created
    only the features can be changed.
    """

    @allowspan
    def __init__(self, start: int, end: int, anntype: str, features=None, annid: int = 0):
        """
        This constructor creates a new annotation instance. Once an annotation has been created,
        the start, end, type and id fields cannot be changed.

        NOTE: this should almost never be done directly
        and instead the method AnnotationSet.add should be used.

        Args:
            start: start offset of the annotation
            end: end offset of the annotation
            anntype: annotation type
            features: an initial collection of features, None for no features.
            annid: the id of the annotation
        """
        if end < start:
            raise Exception(
                f"Cannot create annotation start={start}, end={end}, type={anntype}, "
                "id={annid}, features={features}: start > end"
            )
        if not isinstance(annid, int):
            raise Exception(
                f"Cannot create annotation start={start}, end={end}, type={anntype}, "
                "id={annid}, features={features}: annid is not an int"
            )
        if isinstance(features, int):
            raise Exception(
                f"Cannot create annotation start={start}, end={end}, type={anntype}, "
                "id={annid}, features={features}: features must not be an int, mixed up with annid?"
            )
        self._owner_set = None
        self._features = Features(features, _change_logger=self._log_feature_change)
        self._type = anntype
        self._start = start
        self._end = end
        self._id = annid

    @property
    def type(self) -> str:
        """
        Returns the annotation type.
        """
        return self._type

    @property
    def start(self) -> int:
        """
        Returns the start offset.
        """
        return self._start

    @property
    def end(self):
        """
        Returns the end offset.
        """
        return self._end

    @property
    def features(self):
        """
        Returns the features for the annotation.
        """
        return self._features

    @property
    def id(self):  # pylint: disable=C0103
        """
        Returns the annotation id.
        """
        return self._id

    @property
    def span(self) -> Span:
        """
        Returns a Span instance for this annotation.
        """
        return Span(self._start, self._end)

    def _changelog(self):
        if self._owner_set is not None:
            return self._owner_set.changelog
        return None

    # TODO: for now at least, make sure only simple JSON serialisable things are used! We do NOT
    # allow any user specific types in order to make sure what we create is interchangeable
    # with GATE.
    # In addition we do NOT allow None features.
    # So a feature name always has to be a string (not None), the value has to be anything
    # that is json
    # serialisable (except None keys for maps).
    # For performance reasons we check the feature name but not the value (maybe make checking
    # optional
    # on by default but still optional?)
    def _log_feature_change(self, command: str, feature: str = None, value=None) -> None:
        """

        Args:
          command: str:
          feature: str:  (Default value = None)
          value:  (Default value = None)

        Returns:

        """
        if self._changelog() is None:
            return
        command = "ann-" + command
        change = {
            "command": command,
            "type": "annotation",
            "set": self._owner_set.name,
            "id": self.id,
        }
        if feature is not None:
            change["feature"] = feature
        if value is not None:
            change["value"] = value
        self._changelog().append(change)

    def equal(self, other):
        """
        Compare the content of this annotation with other. This returns True if all public attributes
        of the annotations excluding the annotation id (which is ignored) are equal.

        Args:
            other: another object
        """
        return isinstance(other, Annotation) and \
            self.start == other.start and \
            self.end == other.end and \
            self.features == other.features

    def same(self, other):
        """
        Compare the content of this annotation with other. This returns True if all public attributes
        of the annotations including the annotation id are equal.

        Args:
            other: another object
        """
        return isinstance(other, Annotation) and \
            self.id == other.id and \
            self.start == other.start and \
            self.end == other.end and \
            self.features == other.features

    def __lt__(self, other) -> bool:
        """
        Comparison for sorting: this sorts by increasing start offset,
        then increasing annotation id.
        Since annotation ids within a set are unique, this guarantees a unique order of
        annotations that come from an annotation set. For other collections of annotations,
        the order between those with same start offset and same id is undefined.

        IMPORTANT: this is not consistent with our definition of equality and meant for
        easy sorting of annotations from a single set only! Equality is based on the actual
        content of all the fields in an annotation, including the id, while the less than
        relation is only based on start offset and id. It is therefore possible for two
        annotations A1, A2 that A not < B, B not < A and A != B.

        Note: this is defined to match the default order of the default iterator of an
        AnnotationSet. The default order of Span is different, so ordering sorting annotations
        directly and sorting them by their Span can result in different orderings.

        Note: for now the other object has to be an instance of Annotation, duck typing is
        not supported!
        """
        if not isinstance(other, Annotation):
            raise Exception("Cannot compare to non-Annotation")
        if self.start < other.start:
            return True
        elif self.start > other.start:
            return False
        else:
            return self.id < other.id

    def __repr__(self) -> str:
        """
        String representation of the annotation.
        """
        return "Annotation({},{},{},features={},id={})".format(
            self.start, self.end, self.type, self._features, self.id
        )

    @property
    def length(self) -> int:
        """
        Returns the length of the annotation: this is the length of the offset span.
        Since the end offset is one after the last
        element, we return end-start. Note: this is deliberately not implemented as len(ann), as
        len(annset) returns the number of annotations in the set but annset.length()
        also returns the
        span length of the annotation set, so the method name for this is identical between
        annotations
        and annotation sets.
        """
        return self.end - self.start

    @support_annotation_or_set
    def isoverlapping(self, start: int, end: int) -> bool:
        """
        Checks if this annotation is overlapping with the given span, annotation or
        annotation set.

        Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
          (see gatenlp._utils.support_annotation_or_set)

        Args:
          start: start offset of the span
          end: end offset of the span

        Returns:
          `True` if overlapping, `False` otherwise

        """
        if start == end:
            return self.iscovering(start)
        else:
            return self.iscovering(start) or self.iscovering(end - 1)

    @support_annotation_or_set
    def isleftoverlapping(self, start: int, end: int) -> bool:
        """
        Checks if this annotation is overlapping with the given span, annotation or
        annotation set on the left, i.e. the last character is inside the span and the
        first character is before the span.

        Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
          (see gatenlp._utils.support_annotation_or_set)

        Args:
          start: start offset of the span
          end: end offset of the span

        Returns:
          `True` if left-overlapping, `False` otherwise

        """
        return self.start <= start and self.end <= end

    @support_annotation_or_set
    def isrightoverlapping(self, start: int, end: int) -> bool:
        """
        Checks if this annotation is overlapping with the given span, annotation or
        annotation set on the right, i.e. the first character is inside the span.

        Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
          (see gatenlp._utils.support_annotation_or_set)

        Args:
          start: start offset of the span
          end: end offset of the span

        Returns:
          `True` if right-overlapping, `False` otherwise

        """
        return self.start >= start and self.end >= end

    @support_annotation_or_set
    def iscoextensive(self, start: int, end: int) -> bool:
        """
        Checks if this annotation is coextensive with the given span, annotation or
        annotation set, i.e. has exactly the same start and end offsets.

        Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
          (see gatenlp._utils.support_annotation_or_set)

        Args:
          start: start offset of the span
          end: end offset of the span

        Returns:
          `True` if coextensive, `False` otherwise

        """
        return self.start == start and self.end == end

    @support_annotation_or_set
    def iswithin(self, start: int, end: int) -> bool:
        """
        Checks if this annotation is within the given span, annotation or
        annotation set, i.e. both the start and end offsets of this annotation
        are after the given start and before the given end.

        Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
          (see gatenlp._utils.support_annotation_or_set)

        Args:
          start: start offset of the span
          end: end offset of the span

        Returns:
          `True` if within, `False` otherwise

        """
        return start <= self.start and end >= self.end

    @support_annotation_or_set
    def isbefore(self, start: int, end: int, immediately=False) -> bool:
        """
        Checks if this annotation is before the other span, i.e. the end of this annotation
        is before the start of the other annotation or span.

        Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
          (see gatenlp._utils.support_annotation_or_set)

        Args:
            start: start offset of the span
            end: end offset of the span
            immediately: if true checks if this annotation ends immediately before the
                other one (Default value = False)

        Returns:
          True if before, False otherwise

        """
        if immediately:
            return self.end == start
        else:
            return self.end <= start

    @support_annotation_or_set
    def isafter(self, start: int, end: int, immediately=False) -> bool:
        """Checks if this annotation is after the other span, i.e. the start of this annotation
        is after the end of the other annotation or span.

        Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
          (see gatenlp._utils.support_annotation_or_set)

        Args:
          start: start offset of the span
          end: end offset of the span
          immediately: if true checks if this annotation starts immediately after the other one
              (Default value = False)

        Returns:
          True if after, False otherwise

        """
        if immediately:
            return self.start == end
        else:
            return self.start >= end

    @support_annotation_or_set
    def isstartingat(self, start: int, end_: int) -> bool:
        """
        Return True iff the annotation starts at the given start offset.

        Args:
            start: start offset
            end_: only present so the method can be used with anything that can be interpreted as
                a span (AnnotationSet, Span)
        """
        return self._start == start

    @support_annotation_or_set
    def isendingwith(self, start: int, end: int) -> bool:
        """
        Checks if this annotation is ending at the same offset as the given span or annotation.

        Args:
            start: start of the span (ignored)
            end: end of the span

        Returns:
            True if ending at the same offset as the span or annotation

        """
        return self._end == end

    @support_annotation_or_set
    def gap(self, start: int, end: int) -> int:
        """
        Return the gep between this annotation and the other annotation.
        This is the distance between
        the last character of the first annotation and the first character of the
        second annotation in
        sequence, so it is always independent of the order of the two annotations.

        This is negative if the annotations overlap.

        Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
            (see gatenlp._utils.support_annotation_or_set)

        Args:
            start: start offset of span
            end: end offset of span

        Returns:
          size of gap

        """
        if self.start < start:
            # ann1start = self.start
            ann1end = self.end
            ann2start = start
            # ann2end = end
        else:
            ann2start = self.start
            # ann2end = self.end
            # ann1start = start
            ann1end = end
        return ann2start - ann1end

    @support_annotation_or_set
    def iscovering(self, start: int, end=None) -> bool:
        """Checks if this annotation is covering the given span, annotation or
        annotation set, i.e. both the given start and end offsets
        are after the start of this annotation and before the end of this annotation.

        If end is not given, then the method checks if start is an offset of a character
        contained in the span.

        Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
          (see gatenlp._utils.support_annotation_or_set)

        Args:
          start: start offset of the span
          end: end offset of the span

        Returns:
          True if covering, False otherwise

        """
        if end is None:
            if self.end == self.start:
                return self.start == start
            else:
                return self.start <= start < self.end
        else:
            return self.start <= start and self.end >= end

    def to_dict(self, offset_mapper=None, offset_type=None, **kwargs):
        """
        Return a representation of this annotation as a nested map. This representation is
        used for several serialization methods.

        Args:
            offset_mapper: the offset mapper to use, must be specified if
                `offset_type` is specified.
            offset_type: the offset type to be used for the conversionm must be specified if
                `offset_mapper` is specified

        Returns:
            the dictionary representation of the Annotation
        """
        if (offset_mapper and not offset_type) or (not offset_mapper and offset_type):
            raise Exception(
                "offset_mapper and offset_type must be specified both or none"
            )
        if offset_mapper is not None:
            if offset_type == OFFSET_TYPE_JAVA:
                start = offset_mapper.convert_to_java(self._start)
                end = offset_mapper.convert_to_java(self._end)
            elif offset_type == OFFSET_TYPE_PYTHON:
                start = offset_mapper.convert_to_python(self._start)
                end = offset_mapper.convert_to_python(self._end)
            else:
                raise Exception(
                    f"Not a valid offset type: {offset_type}, must be 'p' or 'j'"
                )
        else:
            start = self._start
            end = self._end
        return {
            "type": self.type,
            "start": start,
            "end": end,
            "id": self.id,
            "features": self._features.to_dict(),
        }

    @staticmethod
    def from_dict(dictrepr, owner_set=None, **kwargs):
        """
        Construct an annotation object from the dictionary representation.

        Args:
          dictrepr: dictionary representation
          owner_set: the owning set the annotation should have (Default value = None)
          kwargs: ignored
        """
        ann = Annotation(
            dictrepr.get("start"),
            dictrepr.get("end"),
            dictrepr.get("type"),
            annid=dictrepr.get("id"),
            features=dictrepr.get("features"),
        )
        ann._owner_set = owner_set
        return ann

    def __copy__(self):
        return Annotation(
            self._start, self._end, self._type, annid=self._id, features=self._features
        )

    def copy(self):
        """
        Return a shallow copy of the annotation (features are shared).
        """
        return self.__copy__()

    def __deepcopy__(self, memo=None):
        if self._features is not None:
            fts = lib_copy.deepcopy(self._features.to_dict(), memo=memo)
        else:
            fts = None
        return Annotation(
            self._start, self._end, self._type, annid=self._id, features=fts
        )

    def deepcopy(self, memo=None):
        """
        Return a deep copy of the annotation (features and their values are copied as well).
        """
        return lib_copy.deepcopy(self, memo=memo)

    def _update_offsets(self, start, end):
        """
        Update the start and/or end offset of an existing annotation. THIS IS FOR INTERNAL USE ONLY!
        This method should never be use by library users as it can cause many different, unexpected
        and hard to debug problems. This method is here for very limited use by library-internal methods only!

        Args:
            start: new start offset
            end: new end offset
        """
        self._start = start
        self._end = end

Static methods

def from_dict(dictrepr, owner_set=None, **kwargs)

Construct an annotation object from the dictionary representation.

Args

dictrepr
dictionary representation
owner_set
the owning set the annotation should have (Default value = None)
kwargs
ignored
Expand source code
@staticmethod
def from_dict(dictrepr, owner_set=None, **kwargs):
    """
    Construct an annotation object from the dictionary representation.

    Args:
      dictrepr: dictionary representation
      owner_set: the owning set the annotation should have (Default value = None)
      kwargs: ignored
    """
    ann = Annotation(
        dictrepr.get("start"),
        dictrepr.get("end"),
        dictrepr.get("type"),
        annid=dictrepr.get("id"),
        features=dictrepr.get("features"),
    )
    ann._owner_set = owner_set
    return ann

Instance variables

var end

Returns the end offset.

Expand source code
@property
def end(self):
    """
    Returns the end offset.
    """
    return self._end
var features

Returns the features for the annotation.

Expand source code
@property
def features(self):
    """
    Returns the features for the annotation.
    """
    return self._features
var id

Returns the annotation id.

Expand source code
@property
def id(self):  # pylint: disable=C0103
    """
    Returns the annotation id.
    """
    return self._id
var length : int

Returns the length of the annotation: this is the length of the offset span. Since the end offset is one after the last element, we return end-start. Note: this is deliberately not implemented as len(ann), as len(annset) returns the number of annotations in the set but annset.length() also returns the span length of the annotation set, so the method name for this is identical between annotations and annotation sets.

Expand source code
@property
def length(self) -> int:
    """
    Returns the length of the annotation: this is the length of the offset span.
    Since the end offset is one after the last
    element, we return end-start. Note: this is deliberately not implemented as len(ann), as
    len(annset) returns the number of annotations in the set but annset.length()
    also returns the
    span length of the annotation set, so the method name for this is identical between
    annotations
    and annotation sets.
    """
    return self.end - self.start
var spanSpan

Returns a Span instance for this annotation.

Expand source code
@property
def span(self) -> Span:
    """
    Returns a Span instance for this annotation.
    """
    return Span(self._start, self._end)
var start : int

Returns the start offset.

Expand source code
@property
def start(self) -> int:
    """
    Returns the start offset.
    """
    return self._start
var type : str

Returns the annotation type.

Expand source code
@property
def type(self) -> str:
    """
    Returns the annotation type.
    """
    return self._type

Methods

def copy(self)

Return a shallow copy of the annotation (features are shared).

Expand source code
def copy(self):
    """
    Return a shallow copy of the annotation (features are shared).
    """
    return self.__copy__()
def deepcopy(self, memo=None)

Return a deep copy of the annotation (features and their values are copied as well).

Expand source code
def deepcopy(self, memo=None):
    """
    Return a deep copy of the annotation (features and their values are copied as well).
    """
    return lib_copy.deepcopy(self, memo=memo)
def equal(self, other)

Compare the content of this annotation with other. This returns True if all public attributes of the annotations excluding the annotation id (which is ignored) are equal.

Args

other
another object
Expand source code
def equal(self, other):
    """
    Compare the content of this annotation with other. This returns True if all public attributes
    of the annotations excluding the annotation id (which is ignored) are equal.

    Args:
        other: another object
    """
    return isinstance(other, Annotation) and \
        self.start == other.start and \
        self.end == other.end and \
        self.features == other.features
def gap(self, start: int, end: int) ‑> int

Return the gep between this annotation and the other annotation. This is the distance between the last character of the first annotation and the first character of the second annotation in sequence, so it is always independent of the order of the two annotations.

This is negative if the annotations overlap.

Note: this can be called with an Annotation or AnnotationSet instead of start and end (see gatenlp._utils.support_annotation_or_set)

Args

start
start offset of span
end
end offset of span

Returns

size of gap

Expand source code
@support_annotation_or_set
def gap(self, start: int, end: int) -> int:
    """
    Return the gep between this annotation and the other annotation.
    This is the distance between
    the last character of the first annotation and the first character of the
    second annotation in
    sequence, so it is always independent of the order of the two annotations.

    This is negative if the annotations overlap.

    Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
        (see gatenlp._utils.support_annotation_or_set)

    Args:
        start: start offset of span
        end: end offset of span

    Returns:
      size of gap

    """
    if self.start < start:
        # ann1start = self.start
        ann1end = self.end
        ann2start = start
        # ann2end = end
    else:
        ann2start = self.start
        # ann2end = self.end
        # ann1start = start
        ann1end = end
    return ann2start - ann1end
def isafter(self, start: int, end: int, immediately=False) ‑> bool

Checks if this annotation is after the other span, i.e. the start of this annotation is after the end of the other annotation or span.

Note: this can be called with an Annotation or AnnotationSet instead of start and end (see gatenlp._utils.support_annotation_or_set)

Args

start
start offset of the span
end
end offset of the span
immediately
if true checks if this annotation starts immediately after the other one (Default value = False)

Returns

True if after, False otherwise

Expand source code
@support_annotation_or_set
def isafter(self, start: int, end: int, immediately=False) -> bool:
    """Checks if this annotation is after the other span, i.e. the start of this annotation
    is after the end of the other annotation or span.

    Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
      (see gatenlp._utils.support_annotation_or_set)

    Args:
      start: start offset of the span
      end: end offset of the span
      immediately: if true checks if this annotation starts immediately after the other one
          (Default value = False)

    Returns:
      True if after, False otherwise

    """
    if immediately:
        return self.start == end
    else:
        return self.start >= end
def isbefore(self, start: int, end: int, immediately=False) ‑> bool

Checks if this annotation is before the other span, i.e. the end of this annotation is before the start of the other annotation or span.

Note: this can be called with an Annotation or AnnotationSet instead of start and end (see gatenlp._utils.support_annotation_or_set)

Args

start
start offset of the span
end
end offset of the span
immediately
if true checks if this annotation ends immediately before the other one (Default value = False)

Returns

True if before, False otherwise

Expand source code
@support_annotation_or_set
def isbefore(self, start: int, end: int, immediately=False) -> bool:
    """
    Checks if this annotation is before the other span, i.e. the end of this annotation
    is before the start of the other annotation or span.

    Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
      (see gatenlp._utils.support_annotation_or_set)

    Args:
        start: start offset of the span
        end: end offset of the span
        immediately: if true checks if this annotation ends immediately before the
            other one (Default value = False)

    Returns:
      True if before, False otherwise

    """
    if immediately:
        return self.end == start
    else:
        return self.end <= start
def iscoextensive(self, start: int, end: int) ‑> bool

Checks if this annotation is coextensive with the given span, annotation or annotation set, i.e. has exactly the same start and end offsets.

Note: this can be called with an Annotation or AnnotationSet instead of start and end (see gatenlp._utils.support_annotation_or_set)

Args

start
start offset of the span
end
end offset of the span

Returns

True if coextensive, False otherwise

Expand source code
@support_annotation_or_set
def iscoextensive(self, start: int, end: int) -> bool:
    """
    Checks if this annotation is coextensive with the given span, annotation or
    annotation set, i.e. has exactly the same start and end offsets.

    Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
      (see gatenlp._utils.support_annotation_or_set)

    Args:
      start: start offset of the span
      end: end offset of the span

    Returns:
      `True` if coextensive, `False` otherwise

    """
    return self.start == start and self.end == end
def iscovering(self, start: int, end=None) ‑> bool

Checks if this annotation is covering the given span, annotation or annotation set, i.e. both the given start and end offsets are after the start of this annotation and before the end of this annotation.

If end is not given, then the method checks if start is an offset of a character contained in the span.

Note: this can be called with an Annotation or AnnotationSet instead of start and end (see gatenlp._utils.support_annotation_or_set)

Args

start
start offset of the span
end
end offset of the span

Returns

True if covering, False otherwise

Expand source code
@support_annotation_or_set
def iscovering(self, start: int, end=None) -> bool:
    """Checks if this annotation is covering the given span, annotation or
    annotation set, i.e. both the given start and end offsets
    are after the start of this annotation and before the end of this annotation.

    If end is not given, then the method checks if start is an offset of a character
    contained in the span.

    Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
      (see gatenlp._utils.support_annotation_or_set)

    Args:
      start: start offset of the span
      end: end offset of the span

    Returns:
      True if covering, False otherwise

    """
    if end is None:
        if self.end == self.start:
            return self.start == start
        else:
            return self.start <= start < self.end
    else:
        return self.start <= start and self.end >= end
def isendingwith(self, start: int, end: int) ‑> bool

Checks if this annotation is ending at the same offset as the given span or annotation.

Args

start
start of the span (ignored)
end
end of the span

Returns

True if ending at the same offset as the span or annotation

Expand source code
@support_annotation_or_set
def isendingwith(self, start: int, end: int) -> bool:
    """
    Checks if this annotation is ending at the same offset as the given span or annotation.

    Args:
        start: start of the span (ignored)
        end: end of the span

    Returns:
        True if ending at the same offset as the span or annotation

    """
    return self._end == end
def isleftoverlapping(self, start: int, end: int) ‑> bool

Checks if this annotation is overlapping with the given span, annotation or annotation set on the left, i.e. the last character is inside the span and the first character is before the span.

Note: this can be called with an Annotation or AnnotationSet instead of start and end (see gatenlp._utils.support_annotation_or_set)

Args

start
start offset of the span
end
end offset of the span

Returns

True if left-overlapping, False otherwise

Expand source code
@support_annotation_or_set
def isleftoverlapping(self, start: int, end: int) -> bool:
    """
    Checks if this annotation is overlapping with the given span, annotation or
    annotation set on the left, i.e. the last character is inside the span and the
    first character is before the span.

    Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
      (see gatenlp._utils.support_annotation_or_set)

    Args:
      start: start offset of the span
      end: end offset of the span

    Returns:
      `True` if left-overlapping, `False` otherwise

    """
    return self.start <= start and self.end <= end
def isoverlapping(self, start: int, end: int) ‑> bool

Checks if this annotation is overlapping with the given span, annotation or annotation set.

Note: this can be called with an Annotation or AnnotationSet instead of start and end (see gatenlp._utils.support_annotation_or_set)

Args

start
start offset of the span
end
end offset of the span

Returns

True if overlapping, False otherwise

Expand source code
@support_annotation_or_set
def isoverlapping(self, start: int, end: int) -> bool:
    """
    Checks if this annotation is overlapping with the given span, annotation or
    annotation set.

    Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
      (see gatenlp._utils.support_annotation_or_set)

    Args:
      start: start offset of the span
      end: end offset of the span

    Returns:
      `True` if overlapping, `False` otherwise

    """
    if start == end:
        return self.iscovering(start)
    else:
        return self.iscovering(start) or self.iscovering(end - 1)
def isrightoverlapping(self, start: int, end: int) ‑> bool

Checks if this annotation is overlapping with the given span, annotation or annotation set on the right, i.e. the first character is inside the span.

Note: this can be called with an Annotation or AnnotationSet instead of start and end (see gatenlp._utils.support_annotation_or_set)

Args

start
start offset of the span
end
end offset of the span

Returns

True if right-overlapping, False otherwise

Expand source code
@support_annotation_or_set
def isrightoverlapping(self, start: int, end: int) -> bool:
    """
    Checks if this annotation is overlapping with the given span, annotation or
    annotation set on the right, i.e. the first character is inside the span.

    Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
      (see gatenlp._utils.support_annotation_or_set)

    Args:
      start: start offset of the span
      end: end offset of the span

    Returns:
      `True` if right-overlapping, `False` otherwise

    """
    return self.start >= start and self.end >= end
def isstartingat(self, start: int, end_: int) ‑> bool

Return True iff the annotation starts at the given start offset.

Args

start
start offset
end_
only present so the method can be used with anything that can be interpreted as a span (AnnotationSet, Span)
Expand source code
@support_annotation_or_set
def isstartingat(self, start: int, end_: int) -> bool:
    """
    Return True iff the annotation starts at the given start offset.

    Args:
        start: start offset
        end_: only present so the method can be used with anything that can be interpreted as
            a span (AnnotationSet, Span)
    """
    return self._start == start
def iswithin(self, start: int, end: int) ‑> bool

Checks if this annotation is within the given span, annotation or annotation set, i.e. both the start and end offsets of this annotation are after the given start and before the given end.

Note: this can be called with an Annotation or AnnotationSet instead of start and end (see gatenlp._utils.support_annotation_or_set)

Args

start
start offset of the span
end
end offset of the span

Returns

True if within, False otherwise

Expand source code
@support_annotation_or_set
def iswithin(self, start: int, end: int) -> bool:
    """
    Checks if this annotation is within the given span, annotation or
    annotation set, i.e. both the start and end offsets of this annotation
    are after the given start and before the given end.

    Note: this can be called with an Annotation or AnnotationSet instead of `start` and `end`
      (see gatenlp._utils.support_annotation_or_set)

    Args:
      start: start offset of the span
      end: end offset of the span

    Returns:
      `True` if within, `False` otherwise

    """
    return start <= self.start and end >= self.end
def same(self, other)

Compare the content of this annotation with other. This returns True if all public attributes of the annotations including the annotation id are equal.

Args

other
another object
Expand source code
def same(self, other):
    """
    Compare the content of this annotation with other. This returns True if all public attributes
    of the annotations including the annotation id are equal.

    Args:
        other: another object
    """
    return isinstance(other, Annotation) and \
        self.id == other.id and \
        self.start == other.start and \
        self.end == other.end and \
        self.features == other.features
def to_dict(self, offset_mapper=None, offset_type=None, **kwargs)

Return a representation of this annotation as a nested map. This representation is used for several serialization methods.

Args

offset_mapper
the offset mapper to use, must be specified if offset_type is specified.
offset_type
the offset type to be used for the conversionm must be specified if offset_mapper is specified

Returns

the dictionary representation of the Annotation

Expand source code
def to_dict(self, offset_mapper=None, offset_type=None, **kwargs):
    """
    Return a representation of this annotation as a nested map. This representation is
    used for several serialization methods.

    Args:
        offset_mapper: the offset mapper to use, must be specified if
            `offset_type` is specified.
        offset_type: the offset type to be used for the conversionm must be specified if
            `offset_mapper` is specified

    Returns:
        the dictionary representation of the Annotation
    """
    if (offset_mapper and not offset_type) or (not offset_mapper and offset_type):
        raise Exception(
            "offset_mapper and offset_type must be specified both or none"
        )
    if offset_mapper is not None:
        if offset_type == OFFSET_TYPE_JAVA:
            start = offset_mapper.convert_to_java(self._start)
            end = offset_mapper.convert_to_java(self._end)
        elif offset_type == OFFSET_TYPE_PYTHON:
            start = offset_mapper.convert_to_python(self._start)
            end = offset_mapper.convert_to_python(self._end)
        else:
            raise Exception(
                f"Not a valid offset type: {offset_type}, must be 'p' or 'j'"
            )
    else:
        start = self._start
        end = self._end
    return {
        "type": self.type,
        "start": start,
        "end": end,
        "id": self.id,
        "features": self._features.to_dict(),
    }