Microsoft GraphRAG

Microsoft GraphRAG is a Python library for building knowledge graphs from unstructured text and using them for retrieval-augmented generation. It combines graph-based indexing with vector search to improve the quality and relevance of LLM responses.

Qdrant can be used as a custom vector store backend for GraphRAG, enabling you to leverage Qdrant’s performance and scalability for storing and searching document embeddings.

Installation

Install the required packages:

pip install graphrag qdrant-client

Custom Vector Store Implementation

GraphRAG allows you to register custom vector stores by extending the VectorStore base class:

import uuid

from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct
from graphrag_vectors import VectorStore, VectorStoreDocument


class QdrantVectorStore(VectorStore):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.client = QdrantClient(
            url="https://xyz-example.eu-central.aws.cloud.qdrant.io:6333",
            api_key="<your-api-key>",
        )
        self.collection_name = self.index_name
        self.vector_size = kwargs.get("vector_size", 384)

    def create_index(self, **kwargs):
        self.client.create_collection(
            collection_name=self.collection_name,
            vectors_config=VectorParams(
                size=self.vector_size, distance=Distance.COSINE
            ),
        )

    def load_documents(
        self, documents: list[VectorStoreDocument], overwrite: bool = False
    ):
        points = [
            PointStruct(id=str(uuid.uuid4()), vector=doc.vector, payload={"_original_id": doc.id})
            for doc in documents
            if doc.vector
        ]
        self.client.upsert(collection_name=self.collection_name, points=points)

    def similarity_search_by_vector(
        self, query_embedding: list[float], k: int = 10, **kwargs
    ):
        results = self.client.query_points(
            collection_name=self.collection_name,
            query=query_embedding,
            limit=k,
        ).points

        return [
            VectorStoreSearchResult(
                document=VectorStoreDocument(
                    id=hit.payload["_original_id"], vector=hit.vector
                ),
                score=hit.score,
            )
            for hit in results
        ]

     # ...other graphrag_vectors.VectorStore methods

Usage

Register and use the custom Qdrant vector store:

from graphrag_vectors import (
    register_vector_store,
    create_vector_store,
    VectorStoreConfig,
    IndexSchema,
)

# Register the custom vector store
register_vector_store("qdrant", QdrantVectorStore)

# Create and initialize
schema = IndexSchema(index_name="my_collection")
vector_store = create_vector_store(
    VectorStoreConfig(type="qdrant", vector_size=1536),
    schema,
)

vector_store.connect()
vector_store.create_index()

# Load documents
documents = [
    VectorStoreDocument(id="doc_1", vector=[0.1, 0.2, ...]),
    VectorStoreDocument(id="doc_2", vector=[0.3, 0.4, ...]),
]
vector_store.load_documents(documents)

results = vector_store.similarity_search_by_vector([0.5, 0.6, ...], k=5)

Further Reading

Was this page useful?

Thank you for your feedback! 🙏

We are sorry to hear that. 😔 You can edit this page on GitHub, or create a GitHub issue.