diff --git a/src/neo4j_graphrag/experimental/components/text_splitters/__init__.py b/src/neo4j_graphrag/experimental/components/text_splitters/__init__.py
index e69de29bb..d128c3410 100644
--- a/src/neo4j_graphrag/experimental/components/text_splitters/__init__.py
+++ b/src/neo4j_graphrag/experimental/components/text_splitters/__init__.py
@@ -0,0 +1,23 @@
+#  Copyright (c) "Neo4j"
+#  Neo4j Sweden AB [https://neo4j.com]
+#  #
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#  #
+#      https://www.apache.org/licenses/LICENSE-2.0
+#  #
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+try:
+    from neo4j_graphrag.experimental.components.text_splitters.hierarchical_splitter import (
+        HierarchicalTextSplitter,
+    )
+
+    __all__ = ["HierarchicalTextSplitter"]
+except ImportError:
+    pass
diff --git a/src/neo4j_graphrag/experimental/components/text_splitters/hierarchical_splitter.py b/src/neo4j_graphrag/experimental/components/text_splitters/hierarchical_splitter.py
new file mode 100644
index 000000000..32918377e
--- /dev/null
+++ b/src/neo4j_graphrag/experimental/components/text_splitters/hierarchical_splitter.py
@@ -0,0 +1,452 @@
+#  Copyright (c) "Neo4j"
+#  Neo4j Sweden AB [https://neo4j.com]
+#  #
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#  #
+#      https://www.apache.org/licenses/LICENSE-2.0
+#  #
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+"""Hierarchical text splitter that detects section boundaries before chunking.
+
+Supported header strategies:
+
+* ``"markdown"`` — ATX Markdown header lines (``# H1``, ``## H2``, … up to ``###### H6``): one to six ``#`` characters followed by a space and header text.
+* ``"capitalization"`` — short Title Case or ALL_CAPS lines without terminal
+  punctuation (appropriate for plain-text output from loaders like
+  LiteParseLoader).
+* ``"blank_line"`` — short lines that are surrounded by blank lines on both
+  sides (a common plain-text section marker).
+* ``"spacy_verbless"`` — SpaCy-parsed sentences that are short, contain no
+  verb, and are followed by a longer sentence.
+
+All strategies produce a list of *sections* (contiguous text blocks).  Each
+section is then emitted as a single chunk when it fits within ``max_chunk_size``,
+or recursively split with ``chunk_overlap`` when it is larger.
+
+Optionally, when ``drop_verbless_sentences=True`` (default), SpaCy is used to
+remove sentences with no verb token from every emitted chunk.
+"""
+
+from __future__ import annotations
+
+import re
+from typing import Any, Optional
+
+from pydantic import validate_call
+
+from neo4j_graphrag.experimental.components.text_splitters.base import TextSplitter
+from neo4j_graphrag.experimental.components.types import TextChunk, TextChunks
+
+# ---------------------------------------------------------------------------
+# Header-detection helpers
+# ---------------------------------------------------------------------------
+
+_MARKDOWN_HEADER_RE = re.compile(r"^#{1,6}\s+\S", re.MULTILINE)
+
+# Title Case: starts with capital, no terminal punctuation at end of line,
+# short (≤ 80 chars), and at least half the words are Title-cased.
+_TITLECASE_RE = re.compile(r"^[A-Z][^\n]{0,78}$")
+_TERMINAL_PUNCT_RE = re.compile(r"[.!?;,]$")
+
+# ALL_CAPS line: all uppercase letters / spaces / digits
+_ALLCAPS_RE = re.compile(r"^[A-Z0-9][A-Z0-9 \t\-:]+$")
+
+# Blank-line boundary: a line that is non-empty, short, and preceded/followed
+# by blank lines (handled at section-split level, not a single regex).
+_SHORT_LINE_MAX = 80
+
+
+def _is_title_case(line: str) -> bool:
+    """Return True when a line looks like a Title Case heading."""
+    if not _TITLECASE_RE.match(line):
+        return False
+    if _TERMINAL_PUNCT_RE.search(line):
+        return False
+    words = line.split()
+    if len(words) == 0:
+        return False
+    capitalised = sum(1 for w in words if w and w[0].isupper())
+    return capitalised / len(words) >= 0.5
+
+
+def _is_allcaps(line: str) -> bool:
+    """Return True when a line is ALL CAPS without terminal punctuation."""
+    if not _ALLCAPS_RE.match(line):
+        return False
+    return not _TERMINAL_PUNCT_RE.search(line)
+
+
+def _split_at_markdown_headers(text: str) -> list[str]:
+    """Split *text* into sections at Markdown ATX header lines (``#``, ``##``, …)."""
+    # We split before each header line so that the header stays with its section.
+    lines = text.splitlines(keepends=True)
+    sections: list[str] = []
+    current: list[str] = []
+    for line in lines:
+        if re.match(r"^#{1,6}\s+\S", line) and current:
+            sections.append("".join(current))
+            current = [line]
+        else:
+            current.append(line)
+    if current:
+        sections.append("".join(current))
+    return [s for s in sections if s.strip()]
+
+
+def _split_at_capitalization(text: str) -> list[str]:
+    """Split *text* into sections at Title Case or ALL_CAPS heading lines."""
+    lines = text.splitlines(keepends=True)
+    sections: list[str] = []
+    current: list[str] = []
+    for line in lines:
+        stripped = line.rstrip("\r\n")
+        is_header = (
+            len(stripped) <= _SHORT_LINE_MAX
+            and stripped.strip()
+            and (_is_title_case(stripped.strip()) or _is_allcaps(stripped.strip()))
+        )
+        if is_header and current:
+            sections.append("".join(current))
+            current = [line]
+        else:
+            current.append(line)
+    if current:
+        sections.append("".join(current))
+    return [s for s in sections if s.strip()]
+
+
+def _split_at_blank_line(text: str) -> list[str]:
+    """Split *text* into sections at short lines surrounded by blank lines.
+
+    A line qualifies as a section header when it:
+
+    * Is non-empty and at most 80 characters.
+    * Contains no terminal punctuation (not a regular sentence).
+    * Has no more than 6 words (avoids treating body sentences as headers).
+    * Is preceded by a blank line (or is the very first non-blank line).
+    * Is followed by a blank line.
+    """
+    lines = text.splitlines(keepends=True)
+    n = len(lines)
+    sections: list[str] = []
+    current: list[str] = []
+
+    i = 0
+    while i < n:
+        line = lines[i]
+        stripped = line.rstrip("\r\n").strip()
+        # A "blank-line boundary" header: non-empty, short, preceded by blank,
+        # followed by blank, few words, no terminal punctuation.
+        prev_blank = (i == 0) or (not lines[i - 1].strip())
+        next_blank = (i + 1 >= n) or (not lines[i + 1].strip())
+        word_count = len(stripped.split()) if stripped else 0
+        is_header = (
+            stripped
+            and len(stripped) <= _SHORT_LINE_MAX
+            and not _TERMINAL_PUNCT_RE.search(stripped)
+            and word_count <= 6
+            and prev_blank
+            and next_blank
+        )
+        if is_header and current:
+            sections.append("".join(current))
+            current = [line]
+        else:
+            current.append(line)
+        i += 1
+
+    if current:
+        sections.append("".join(current))
+    return [s for s in sections if s.strip()]
+
+
+def _split_at_spacy_verbless(text: str, nlp: Any) -> list[str]:
+    """Split *text* into sections at SpaCy-detected verbless heading sentences.
+
+    A sentence qualifies as a heading when it:
+
+    * Has at most 80 characters.
+    * Contains no verb token (POS tag ``VERB`` or ``AUX``).
+    * Is immediately followed by a longer sentence (> 80 chars).
+    """
+    doc = nlp(text)
+    sentences = list(doc.sents)
+    if not sentences:
+        return [text] if text.strip() else []
+
+    # Identify which sentences are "headers".
+    is_header = [False] * len(sentences)
+    for idx, sent in enumerate(sentences):
+        sent_text = sent.text.strip()
+        if len(sent_text) > _SHORT_LINE_MAX:
+            continue
+        has_verb = any(tok.pos_ in ("VERB", "AUX") for tok in sent)
+        if has_verb:
+            continue
+        # Must be followed by a longer sentence.
+        if (
+            idx + 1 < len(sentences)
+            and len(sentences[idx + 1].text.strip()) > _SHORT_LINE_MAX
+        ):
+            is_header[idx] = True
+
+    # Build sections: split before each header sentence (except the first).
+    sections: list[str] = []
+    current_parts: list[str] = []
+    for idx, sent in enumerate(sentences):
+        if is_header[idx] and current_parts:
+            sections.append(" ".join(current_parts))
+            current_parts = [sent.text]
+        else:
+            current_parts.append(sent.text)
+
+    if current_parts:
+        sections.append(" ".join(current_parts))
+
+    return [s for s in sections if s.strip()]
+
+
+# ---------------------------------------------------------------------------
+# Overlap-based character splitter (for sections larger than max_chunk_size)
+# ---------------------------------------------------------------------------
+
+
+def _split_with_overlap(text: str, max_size: int, overlap: int) -> list[str]:
+    """Split *text* into character-level chunks of at most *max_size* with
+    *overlap* characters carried over from the previous chunk.
+
+    Splits are attempted at whitespace boundaries to avoid cutting words.
+    """
+    if not text:
+        return []
+    chunks: list[str] = []
+    start = 0
+    length = len(text)
+    step = max(1, max_size - overlap)
+
+    while start < length:
+        end = min(start + max_size, length)
+        chunk = text[start:end]
+
+        # Prefer to cut at a whitespace boundary when not at end of text.
+        if end < length:
+            # Walk backwards to find a space.
+            cut = end
+            while cut > start and not text[cut - 1].isspace():
+                cut -= 1
+            if cut > start:
+                end = cut
+                chunk = text[start:end]
+
+        chunks.append(chunk)
+
+        # Advance by step, ensuring we always make progress.
+        next_start = start + step
+        if next_start <= start:
+            next_start = start + 1
+        start = next_start
+
+    return chunks
+
+
+# ---------------------------------------------------------------------------
+# Verb-filter using SpaCy
+# ---------------------------------------------------------------------------
+
+
+def _drop_verbless_sentences(text: str, nlp: Any) -> str:
+    """Remove sentences with no verb token from *text* using SpaCy.
+
+    A sentence is considered *verbless* when it contains no token whose
+    part-of-speech tag is ``VERB`` or ``AUX``.
+    """
+    doc = nlp(text)
+    kept: list[str] = []
+    for sent in doc.sents:
+        has_verb = any(tok.pos_ in ("VERB", "AUX") for tok in sent)
+        if has_verb:
+            kept.append(sent.text)
+    return " ".join(kept)
+
+
+# ---------------------------------------------------------------------------
+# Valid strategy names
+# ---------------------------------------------------------------------------
+
+_VALID_STRATEGIES = frozenset(
+    {"markdown", "capitalization", "blank_line", "spacy_verbless"}
+)
+
+
+# ---------------------------------------------------------------------------
+# Main component
+# ---------------------------------------------------------------------------
+
+
+class HierarchicalTextSplitter(TextSplitter):
+    """Splits text by first detecting section boundaries then chunking each section.
+
+    Args:
+        max_chunk_size (int): Maximum number of characters per output chunk.
+            Defaults to 2048.
+        chunk_overlap (int): Characters of overlap between consecutive chunks
+            when a section must be further split.  Must be less than
+            ``max_chunk_size``.  Defaults to 200.
+        header_strategy (str): How to detect section boundaries.  One of:
+
+            * ``"markdown"`` — Markdown ATX header lines (``#``, ``##``, …).
+            * ``"capitalization"`` — short Title Case or ALL_CAPS lines without
+              terminal punctuation.
+            * ``"blank_line"`` — short lines surrounded by blank lines on both
+              sides.
+            * ``"spacy_verbless"`` — SpaCy-detected short verbless sentences
+              that precede a longer sentence.
+
+        model (str): SpaCy model name loaded when *header_strategy* is
+            ``"spacy_verbless"`` or *drop_verbless_sentences* is ``True``.
+            Defaults to ``"en_core_web_sm"``.
+        drop_verbless_sentences (bool): When ``True`` (default), SpaCy is used
+            to remove verbless sentences from every emitted chunk.  Note that
+            this default value causes SpaCy to be loaded at construction time
+            regardless of the chosen *header_strategy* — install
+            ``neo4j-graphrag[nlp]`` when using the default, or explicitly pass
+            ``drop_verbless_sentences=False`` to avoid the SpaCy dependency.
+
+    Example:
+
+    .. code-block:: python
+
+        from neo4j_graphrag.experimental.components.text_splitters.hierarchical_splitter import (
+            HierarchicalTextSplitter,
+        )
+        from neo4j_graphrag.experimental.pipeline import Pipeline
+
+        pipeline = Pipeline()
+        splitter = HierarchicalTextSplitter(
+            max_chunk_size=2048,
+            chunk_overlap=200,
+            header_strategy="markdown",
+        )
+        pipeline.add_component(splitter, "text_splitter")
+    """
+
+    @validate_call
+    def __init__(
+        self,
+        max_chunk_size: int = 2048,
+        chunk_overlap: int = 200,
+        header_strategy: str = "markdown",
+        model: str = "en_core_web_sm",
+        drop_verbless_sentences: bool = True,
+    ) -> None:
+        if max_chunk_size <= 0:
+            raise ValueError("max_chunk_size must be strictly greater than 0")
+        if chunk_overlap < 0:
+            raise ValueError("chunk_overlap must be >= 0")
+        if chunk_overlap >= max_chunk_size:
+            raise ValueError("chunk_overlap must be strictly less than max_chunk_size")
+        if header_strategy not in _VALID_STRATEGIES:
+            raise ValueError(
+                f"header_strategy must be one of {sorted(_VALID_STRATEGIES)}, "
+                f"got {header_strategy!r}"
+            )
+
+        self.max_chunk_size = max_chunk_size
+        self.chunk_overlap = chunk_overlap
+        self.header_strategy = header_strategy
+        self.model = model
+        self.drop_verbless_sentences = drop_verbless_sentences
+
+        # Pre-load SpaCy only when needed.
+        self._nlp: Optional[Any] = None
+        needs_spacy = header_strategy == "spacy_verbless" or drop_verbless_sentences
+        if needs_spacy:
+            self._nlp = self._load_spacy(model)
+
+    @staticmethod
+    def _load_spacy(model: str) -> Any:
+        """Load a SpaCy model, raising a clear error when SpaCy is missing."""
+        try:
+            import spacy  # noqa: PLC0415
+        except ImportError as exc:
+            raise ImportError(
+                "SpaCy is required for this configuration of HierarchicalTextSplitter. "
+                "Install it with: pip install 'neo4j-graphrag[nlp]'"
+            ) from exc
+        try:
+            return spacy.load(model)
+        except OSError as exc:
+            raise ValueError(
+                f"SpaCy model {model!r} is not installed. "
+                f"Download it with: python -m spacy download {model}"
+            ) from exc
+
+    def _detect_sections(self, text: str) -> list[str]:
+        """Detect section boundaries and return a list of section strings."""
+        strategy = self.header_strategy
+        if strategy == "markdown":
+            sections = _split_at_markdown_headers(text)
+        elif strategy == "capitalization":
+            sections = _split_at_capitalization(text)
+        elif strategy == "blank_line":
+            sections = _split_at_blank_line(text)
+        else:  # "spacy_verbless"
+            if self._nlp is None:
+                raise RuntimeError(
+                    "SpaCy model not loaded for 'spacy_verbless' strategy; this is a bug"
+                )
+            sections = _split_at_spacy_verbless(text, self._nlp)
+
+        # Fallback: if no sections were detected, treat the whole text as one.
+        if not sections:
+            sections = [text] if text.strip() else []
+        return sections
+
+    def _chunk_section(self, section_text: str) -> list[str]:
+        """Return one or more raw text chunks for a single *section_text*."""
+        if len(section_text) <= self.max_chunk_size:
+            return [section_text]
+        return _split_with_overlap(
+            section_text, self.max_chunk_size, self.chunk_overlap
+        )
+
+    def _filter_verbless(self, text: str) -> str:
+        """Apply the verbless-sentence filter if enabled."""
+        if not self.drop_verbless_sentences or self._nlp is None:
+            return text
+        filtered = _drop_verbless_sentences(text, self._nlp)
+        # Fall back to original text when filtering removes everything.
+        return filtered if filtered.strip() else text
+
+    @validate_call
+    async def run(self, text: str) -> TextChunks:
+        """Split *text* into hierarchical chunks.
+
+        Args:
+            text (str): The text to be split.
+
+        Returns:
+            TextChunks: A list of chunks with sequential index values starting
+            from 0.
+        """
+        if not text.strip():
+            return TextChunks(chunks=[])
+
+        sections = self._detect_sections(text)
+        raw_chunks: list[str] = []
+        for section in sections:
+            raw_chunks.extend(self._chunk_section(section))
+
+        chunks: list[TextChunk] = []
+        for raw in raw_chunks:
+            filtered = self._filter_verbless(raw)
+            if filtered.strip():
+                chunks.append(TextChunk(text=filtered, index=len(chunks)))
+
+        return TextChunks(chunks=chunks)
diff --git a/tests/unit/experimental/components/text_splitters/test_hierarchical_splitter.py b/tests/unit/experimental/components/text_splitters/test_hierarchical_splitter.py
new file mode 100644
index 000000000..ba06e02d6
--- /dev/null
+++ b/tests/unit/experimental/components/text_splitters/test_hierarchical_splitter.py
@@ -0,0 +1,524 @@
+#  Copyright (c) "Neo4j"
+#  Neo4j Sweden AB [https://neo4j.com]
+#  #
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#  #
+#      https://www.apache.org/licenses/LICENSE-2.0
+#  #
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+"""Unit tests for HierarchicalTextSplitter.
+
+SpaCy is not downloaded in these tests.  Where the splitter would normally
+load a model (`drop_verbless_sentences=True` or `header_strategy="spacy_verbless"`),
+the tests patch `spacy.load` with a lightweight fake nlp object so that no
+network access or model installation is required.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+# Skip the entire module when spaCy is not installed at all.
+spacy = pytest.importorskip("spacy")
+
+from neo4j_graphrag.experimental.components.text_splitters.hierarchical_splitter import (  # noqa: E402
+    HierarchicalTextSplitter,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers for building fake SpaCy objects without a real model
+# ---------------------------------------------------------------------------
+
+
+def _make_fake_token(text: str, pos: str) -> MagicMock:
+    """Return a MagicMock that looks like a spaCy Token."""
+    tok = MagicMock()
+    tok.text = text
+    tok.pos_ = pos
+    return tok
+
+
+def _make_fake_sent(text: str, tokens: list[MagicMock]) -> MagicMock:
+    """Return a MagicMock that looks like a spaCy Span (sentence).
+
+    Uses ``side_effect`` instead of ``return_value`` so that each call to
+    ``iter(sent)`` creates a *fresh* iterator — important if the sentence is
+    iterated more than once (e.g. across multiple ``run()`` calls or in future
+    multi-pass tests).
+    """
+    sent = MagicMock()
+    sent.text = text
+    sent.__iter__ = MagicMock(side_effect=lambda: iter(tokens))
+    return sent
+
+
+def _make_fake_doc(sentences: list[MagicMock]) -> MagicMock:
+    """Return a MagicMock that looks like a spaCy Doc with .sents."""
+    doc = MagicMock()
+    doc.sents = sentences
+    doc.__iter__ = MagicMock(return_value=iter([]))
+    return doc
+
+
+def _make_nlp_returning_doc(sentences: list[MagicMock]) -> MagicMock:
+    """Return a callable MagicMock that acts as a spaCy nlp() pipeline."""
+    nlp = MagicMock()
+    nlp.return_value = _make_fake_doc(sentences)
+    return nlp
+
+
+# ---------------------------------------------------------------------------
+# Tests: header_strategy="markdown"
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_markdown_two_sections_produce_two_chunks() -> None:
+    """Two Markdown sections produce exactly two chunks."""
+    text = "# Introduction\nThis is the intro section.\n# Conclusion\nThis is the conclusion."
+    splitter = HierarchicalTextSplitter(
+        max_chunk_size=2048,
+        chunk_overlap=0,
+        header_strategy="markdown",
+        drop_verbless_sentences=False,
+    )
+    result = await splitter.run(text)
+    assert len(result.chunks) == 2
+    assert "Introduction" in result.chunks[0].text
+    assert "Conclusion" in result.chunks[1].text
+
+
+@pytest.mark.asyncio
+async def test_markdown_single_section_produces_one_chunk() -> None:
+    """Text with no Markdown headers is treated as one section."""
+    text = "No headers here. Just a single paragraph of text."
+    splitter = HierarchicalTextSplitter(
+        max_chunk_size=2048,
+        chunk_overlap=0,
+        header_strategy="markdown",
+        drop_verbless_sentences=False,
+    )
+    result = await splitter.run(text)
+    assert len(result.chunks) == 1
+
+
+@pytest.mark.asyncio
+async def test_markdown_three_sections_sequential_indices() -> None:
+    """Three Markdown sections produce chunks with sequential indices 0, 1, 2."""
+    text = "# A\nSection A body.\n# B\nSection B body.\n# C\nSection C body."
+    splitter = HierarchicalTextSplitter(
+        max_chunk_size=2048,
+        chunk_overlap=0,
+        header_strategy="markdown",
+        drop_verbless_sentences=False,
+    )
+    result = await splitter.run(text)
+    assert len(result.chunks) == 3
+    for i, chunk in enumerate(result.chunks):
+        assert chunk.index == i
+
+
+# ---------------------------------------------------------------------------
+# Tests: header_strategy="capitalization"
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_capitalization_allcaps_header_splits() -> None:
+    """ALL_CAPS lines without terminal punctuation act as section headers."""
+    text = "INTRODUCTION\nThis paragraph describes the introduction.\nCONCLUSION\nThis paragraph wraps things up."
+    splitter = HierarchicalTextSplitter(
+        max_chunk_size=2048,
+        chunk_overlap=0,
+        header_strategy="capitalization",
+        drop_verbless_sentences=False,
+    )
+    result = await splitter.run(text)
+    assert len(result.chunks) == 2
+    assert "INTRODUCTION" in result.chunks[0].text
+    assert "CONCLUSION" in result.chunks[1].text
+
+
+@pytest.mark.asyncio
+async def test_capitalization_title_case_header_splits() -> None:
+    """Title Case lines without terminal punctuation act as section headers."""
+    text = "First Section Title\nContent for the first section.\nSecond Section Title\nContent for the second section."
+    splitter = HierarchicalTextSplitter(
+        max_chunk_size=2048,
+        chunk_overlap=0,
+        header_strategy="capitalization",
+        drop_verbless_sentences=False,
+    )
+    result = await splitter.run(text)
+    assert len(result.chunks) == 2
+
+
+@pytest.mark.asyncio
+async def test_capitalization_indices_sequential() -> None:
+    """Chunk indices are sequential starting from 0."""
+    text = "PART ONE\nBody of part one.\nPART TWO\nBody of part two.\nPART THREE\nBody of part three."
+    splitter = HierarchicalTextSplitter(
+        max_chunk_size=2048,
+        chunk_overlap=0,
+        header_strategy="capitalization",
+        drop_verbless_sentences=False,
+    )
+    result = await splitter.run(text)
+    for i, chunk in enumerate(result.chunks):
+        assert chunk.index == i, f"chunk {i} has index {chunk.index}"
+
+
+# ---------------------------------------------------------------------------
+# Tests: header_strategy="blank_line"
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_blank_line_short_surrounded_header_splits() -> None:
+    """Short lines surrounded by blank lines act as section headers."""
+    text = "\nOverview\n\nThis section covers the overview of the system.\n\nDetails\n\nThis section covers the fine-grained details."
+    splitter = HierarchicalTextSplitter(
+        max_chunk_size=2048,
+        chunk_overlap=0,
+        header_strategy="blank_line",
+        drop_verbless_sentences=False,
+    )
+    result = await splitter.run(text)
+    assert len(result.chunks) == 2
+    assert "Overview" in result.chunks[0].text
+    assert "Details" in result.chunks[1].text
+
+
+@pytest.mark.asyncio
+async def test_blank_line_indices_sequential() -> None:
+    """Chunk indices are sequential starting from 0 for blank_line strategy."""
+    text = "\nPart A\n\nContent for part A goes here.\n\nPart B\n\nContent for part B goes here."
+    splitter = HierarchicalTextSplitter(
+        max_chunk_size=2048,
+        chunk_overlap=0,
+        header_strategy="blank_line",
+        drop_verbless_sentences=False,
+    )
+    result = await splitter.run(text)
+    for i, chunk in enumerate(result.chunks):
+        assert chunk.index == i
+
+
+# ---------------------------------------------------------------------------
+# Tests: overlap splitting for large sections
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_large_section_split_with_overlap() -> None:
+    """A section larger than max_chunk_size is split into multiple chunks
+    with the last N characters of chunk K equal to the first N characters
+    of chunk K+1.
+
+    Strategy: use no header strategy (treat whole text as one section) via a
+    single Markdown section; make the body very long and max_chunk_size small
+    relative to the body so that we get several chunks all in the body content,
+    and the overlap check applies to non-header chunks.
+    """
+    # The section body is pure 'x' characters with no whitespace.
+    # With max_size=50, overlap=20, step=30:
+    #   chunk 0: body[0:50]   = 'x'*50
+    #   chunk 1: body[30:80]  = 'x'*50, so overlap = body[30:50] = 'x'*20
+    # We avoid the Markdown header prefix eating into the overlap window by
+    # using a plain-text body (no Markdown header at all, so the whole text
+    # is one section) and using drop_verbless_sentences=False.
+    body = "x" * 500
+    overlap = 20
+    max_size = 50
+    splitter = HierarchicalTextSplitter(
+        max_chunk_size=max_size,
+        chunk_overlap=overlap,
+        header_strategy="markdown",
+        drop_verbless_sentences=False,
+    )
+    result = await splitter.run(body)
+    chunks = result.chunks
+    assert len(chunks) > 1
+
+    # Verify overlap: last `overlap` chars of chunk[k] == first `overlap` chars of chunk[k+1].
+    for k in range(len(chunks) - 1):
+        tail = chunks[k].text[-overlap:]
+        head = chunks[k + 1].text[:overlap]
+        assert tail == head, (
+            f"Overlap mismatch between chunk {k} and {k + 1}: "
+            f"tail={tail!r}, head={head!r}"
+        )
+
+
+@pytest.mark.asyncio
+async def test_small_section_emitted_as_single_chunk() -> None:
+    """A section shorter than max_chunk_size is emitted as a single chunk."""
+    text = "# Tiny\nShort body."
+    splitter = HierarchicalTextSplitter(
+        max_chunk_size=2048,
+        chunk_overlap=0,
+        header_strategy="markdown",
+        drop_verbless_sentences=False,
+    )
+    result = await splitter.run(text)
+    assert len(result.chunks) == 1
+    assert "Tiny" in result.chunks[0].text
+    assert "Short body" in result.chunks[0].text
+
+
+@pytest.mark.asyncio
+async def test_overlap_chunk_indices_sequential() -> None:
+    """Indices remain sequential when a section is split due to size."""
+    body = "y" * 300
+    text = f"# Big Section\n{body}"
+    splitter = HierarchicalTextSplitter(
+        max_chunk_size=60,
+        chunk_overlap=10,
+        header_strategy="markdown",
+        drop_verbless_sentences=False,
+    )
+    result = await splitter.run(text)
+    assert len(result.chunks) > 1
+    for i, chunk in enumerate(result.chunks):
+        assert chunk.index == i
+
+
+# ---------------------------------------------------------------------------
+# Tests: drop_verbless_sentences=True (SpaCy mocked)
+# ---------------------------------------------------------------------------
+
+
+def _make_spacy_nlp_with_verbless() -> MagicMock:
+    """Return a fake nlp() that drops verbless sentences when called.
+
+    Sentence 1 (verbless): "No verb here" — tokens have no VERB/AUX.
+    Sentence 2 (with verb): "The dog runs fast" — one VERB token.
+    """
+    # Tokens for sentence 1 (verbless: no VERB/AUX tags)
+    sent1_tokens = [
+        _make_fake_token("No", "DET"),
+        _make_fake_token("verb", "NOUN"),
+        _make_fake_token("here", "ADV"),
+    ]
+    sent1 = _make_fake_sent("No verb here", sent1_tokens)
+
+    # Tokens for sentence 2 (contains a VERB)
+    sent2_tokens = [
+        _make_fake_token("The", "DET"),
+        _make_fake_token("dog", "NOUN"),
+        _make_fake_token("runs", "VERB"),
+        _make_fake_token("fast", "ADV"),
+    ]
+    sent2 = _make_fake_sent("The dog runs fast", sent2_tokens)
+
+    return _make_nlp_returning_doc([sent1, sent2])
+
+
+@pytest.mark.asyncio
+async def test_drop_verbless_removes_verbless_sentence() -> None:
+    """When drop_verbless_sentences=True, sentences with no verb are removed."""
+    fake_nlp = _make_spacy_nlp_with_verbless()
+
+    with patch("spacy.load", return_value=fake_nlp):
+        splitter = HierarchicalTextSplitter(
+            max_chunk_size=2048,
+            chunk_overlap=0,
+            header_strategy="markdown",
+            drop_verbless_sentences=True,
+            model="en_core_web_sm",
+        )
+
+    text = "# Section\nNo verb here. The dog runs fast."
+    result = await splitter.run(text)
+
+    assert len(result.chunks) == 1
+    chunk_text = result.chunks[0].text
+    # Verbless sentence should be gone; verbal sentence should remain.
+    assert "The dog runs fast" in chunk_text
+    assert "No verb here" not in chunk_text
+
+
+@pytest.mark.asyncio
+async def test_drop_verbless_keeps_verbal_sentences() -> None:
+    """When all sentences contain a verb, no text is removed."""
+    sent_tokens = [
+        _make_fake_token("She", "PRON"),
+        _make_fake_token("walks", "VERB"),
+        _make_fake_token("home", "NOUN"),
+    ]
+    sent = _make_fake_sent("She walks home", sent_tokens)
+    fake_nlp = _make_nlp_returning_doc([sent])
+
+    with patch("spacy.load", return_value=fake_nlp):
+        splitter = HierarchicalTextSplitter(
+            max_chunk_size=2048,
+            chunk_overlap=0,
+            header_strategy="markdown",
+            drop_verbless_sentences=True,
+            model="en_core_web_sm",
+        )
+
+    text = "# Section\nShe walks home."
+    result = await splitter.run(text)
+
+    assert len(result.chunks) == 1
+    assert "She walks home" in result.chunks[0].text
+
+
+# ---------------------------------------------------------------------------
+# Tests: chunk index sequencing across multiple sections
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_indices_sequential_across_multiple_sections() -> None:
+    """Indices are global — they continue from the last chunk of the previous section."""
+    text = (
+        "# Alpha\nFirst section body.\n"
+        "# Beta\nSecond section body.\n"
+        "# Gamma\nThird section body."
+    )
+    splitter = HierarchicalTextSplitter(
+        max_chunk_size=2048,
+        chunk_overlap=0,
+        header_strategy="markdown",
+        drop_verbless_sentences=False,
+    )
+    result = await splitter.run(text)
+    assert len(result.chunks) == 3
+    for expected_index, chunk in enumerate(result.chunks):
+        assert chunk.index == expected_index
+
+
+# ---------------------------------------------------------------------------
+# Tests: header_strategy="spacy_verbless"
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_spacy_verbless_strategy_splits_at_verbless_heading() -> None:
+    """header_strategy="spacy_verbless" uses SpaCy to detect verbless headings.
+
+    A short sentence with no verb that precedes a longer sentence is treated as
+    a section heading and causes a split.  This test mocks the nlp pipeline so
+    no model is downloaded.
+    """
+    # Sentence 1 (verbless heading, short ≤ 80 chars):  "Introduction"
+    sent1_tokens = [_make_fake_token("Introduction", "NOUN")]
+    sent1 = _make_fake_sent("Introduction", sent1_tokens)
+
+    # Sentence 2 (long body sentence, > 80 chars, contains VERB):
+    long_body = "This section covers all the foundational concepts you need to understand before proceeding further."
+    sent2_tokens = [
+        _make_fake_token("This", "DET"),
+        _make_fake_token("section", "NOUN"),
+        _make_fake_token("covers", "VERB"),
+    ]
+    sent2 = _make_fake_sent(long_body, sent2_tokens)
+
+    # Sentence 3 (verbless heading): "Conclusion"
+    sent3_tokens = [_make_fake_token("Conclusion", "NOUN")]
+    sent3 = _make_fake_sent("Conclusion", sent3_tokens)
+
+    # Sentence 4 (another long body):
+    long_body2 = "This final section wraps up all the topics discussed and provides closing remarks for the reader."
+    sent4_tokens = [
+        _make_fake_token("This", "DET"),
+        _make_fake_token("section", "NOUN"),
+        _make_fake_token("wraps", "VERB"),
+    ]
+    sent4 = _make_fake_sent(long_body2, sent4_tokens)
+
+    fake_nlp = _make_nlp_returning_doc([sent1, sent2, sent3, sent4])
+
+    with patch("spacy.load", return_value=fake_nlp):
+        splitter = HierarchicalTextSplitter(
+            max_chunk_size=2048,
+            chunk_overlap=0,
+            header_strategy="spacy_verbless",
+            drop_verbless_sentences=False,
+            model="en_core_web_sm",
+        )
+
+    # Use arbitrary text — the nlp mock controls what sents are returned.
+    text = "Introduction. " + long_body + " Conclusion. " + long_body2
+    result = await splitter.run(text)
+
+    # Two verbless headings (sent1, sent3) each trigger a split → 2 sections.
+    assert len(result.chunks) == 2
+    assert result.chunks[0].index == 0
+    assert result.chunks[1].index == 1
+
+
+# ---------------------------------------------------------------------------
+# Tests: edge cases
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_empty_text_returns_no_chunks() -> None:
+    """Empty input produces an empty chunk list."""
+    splitter = HierarchicalTextSplitter(
+        max_chunk_size=2048,
+        chunk_overlap=0,
+        header_strategy="markdown",
+        drop_verbless_sentences=False,
+    )
+    result = await splitter.run("")
+    assert result.chunks == []
+
+
+@pytest.mark.asyncio
+async def test_whitespace_only_returns_no_chunks() -> None:
+    """Whitespace-only input produces an empty chunk list."""
+    splitter = HierarchicalTextSplitter(
+        max_chunk_size=2048,
+        chunk_overlap=0,
+        header_strategy="markdown",
+        drop_verbless_sentences=False,
+    )
+    result = await splitter.run("   \n\t\n  ")
+    assert result.chunks == []
+
+
+def test_invalid_header_strategy_raises() -> None:
+    """An unrecognised header_strategy raises ValueError at construction time."""
+    with pytest.raises(ValueError, match="header_strategy must be one of"):
+        HierarchicalTextSplitter(
+            max_chunk_size=2048,
+            chunk_overlap=0,
+            header_strategy="unknown_strategy",
+            drop_verbless_sentences=False,
+        )
+
+
+def test_chunk_overlap_ge_max_chunk_size_raises() -> None:
+    """chunk_overlap >= max_chunk_size raises ValueError."""
+    with pytest.raises(ValueError, match="chunk_overlap must be strictly less than max_chunk_size"):
+        HierarchicalTextSplitter(
+            max_chunk_size=100,
+            chunk_overlap=100,
+            header_strategy="markdown",
+            drop_verbless_sentences=False,
+        )
+
+
+def test_max_chunk_size_zero_raises() -> None:
+    """max_chunk_size=0 raises ValueError."""
+    with pytest.raises(ValueError, match="max_chunk_size must be strictly greater than 0"):
+        HierarchicalTextSplitter(
+            max_chunk_size=0,
+            chunk_overlap=0,
+            header_strategy="markdown",
+            drop_verbless_sentences=False,
+        )
diff --git a/tests/unit/experimental/components/text_splitters/test_hierarchical_splitter_integration.py b/tests/unit/experimental/components/text_splitters/test_hierarchical_splitter_integration.py
new file mode 100644
index 000000000..e2693880f
--- /dev/null
+++ b/tests/unit/experimental/components/text_splitters/test_hierarchical_splitter_integration.py
@@ -0,0 +1,99 @@
+#  Copyright (c) "Neo4j"
+#  Neo4j Sweden AB [https://neo4j.com]
+#  #
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#  #
+#      https://www.apache.org/licenses/LICENSE-2.0
+#  #
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+"""Integration tests for HierarchicalTextSplitter using the real en_core_web_sm model.
+
+All tests in this module are automatically skipped when either spaCy or the
+``en_core_web_sm`` model is not installed.  No mocking is performed — these
+tests exercise the full NLP pipeline end-to-end.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+spacy = pytest.importorskip("spacy")
+
+
+@pytest.fixture(scope="module")
+def nlp():  # type: ignore[return]
+    try:
+        return spacy.load("en_core_web_sm")
+    except OSError:
+        pytest.skip(
+            "en_core_web_sm not installed — run: python -m spacy download en_core_web_sm"
+        )
+
+
+@pytest.mark.asyncio
+async def test_markdown_split_real(nlp) -> None:  # noqa: ANN001
+    """HierarchicalTextSplitter with markdown strategy returns sequential chunks for a 3-section doc."""
+    from neo4j_graphrag.experimental.components.text_splitters.hierarchical_splitter import (
+        HierarchicalTextSplitter,
+    )
+
+    text = (
+        "# Introduction\n"
+        "This section introduces the topic and provides background information.\n\n"
+        "# Methods\n"
+        "This section describes the experimental methods used in the study.\n\n"
+        "# Conclusion\n"
+        "This section summarises the findings and suggests future work.\n"
+    )
+
+    splitter = HierarchicalTextSplitter(
+        header_strategy="markdown",
+        max_chunk_size=200,
+        chunk_overlap=20,
+        drop_verbless_sentences=False,
+    )
+    result = await splitter.run(text)
+
+    assert len(result.chunks) >= 2, (
+        f"Expected at least 2 chunks for a 3-section markdown doc, got {len(result.chunks)}"
+    )
+    # Indices must be sequential starting from 0.
+    for i, chunk in enumerate(result.chunks):
+        assert chunk.index == i, f"chunk {i} has non-sequential index {chunk.index}"
+
+
+@pytest.mark.asyncio
+async def test_drop_verbless_sentences_real(nlp) -> None:  # noqa: ANN001
+    """drop_verbless_sentences=True drops verbless fragments using the real SpaCy model."""
+    from neo4j_graphrag.experimental.components.text_splitters.hierarchical_splitter import (
+        HierarchicalTextSplitter,
+    )
+
+    # "Overview" is a single-word verbless fragment.
+    # The second sentence contains a real verb ("covers").
+    text = "Overview\n\nThis section covers the main concepts of the system in detail."
+
+    splitter = HierarchicalTextSplitter(
+        header_strategy="blank_line",
+        max_chunk_size=500,
+        chunk_overlap=0,
+        drop_verbless_sentences=True,
+        model="en_core_web_sm",
+    )
+    result = await splitter.run(text)
+
+    # At least one chunk must survive.
+    assert len(result.chunks) >= 1, "Expected at least one chunk after filtering"
+
+    # The verbless word "Overview" should not appear as a standalone sentence
+    # in any chunk, while the main sentence content should be present.
+    all_text = " ".join(chunk.text for chunk in result.chunks)
+    assert "covers" in all_text, (
+        "Expected the verbal sentence to survive verbless-sentence filtering"
+    )