Paging Query Results


  • Paging:
    Paging is the process of fetching a subset (a page) of results from a dataset, rather than retrieving the entire results at once. This method enables processing query results one page at a time.

  • Default page size:

    • Querying Lucene indexes:
      If the client's query definition does Not explicitly specify the page size, the server will default to a C# int.MaxValue (2,147,483,647). In such a case, all results will be returned in a single server call.

    • Querying Corax indexes:
      The default page size is the same as the one employed by Lucene.
      Note: when using Corax as the search engine, indexes with more than a C# int.MaxValue entries can be created and used.
      To match this capacity, queries over Corax indexes can skip a number of results that exceed this max value and take documents from that location.

  • Performance:
    Using paging is beneficial when handling large result datasets, contributing to improved performance.
    See paging and performance here below.

  • In this page:


No-paging example

// A simple query without paging:
// ==============================

/** @var array<Product> $allResults */
$allResults = $session
    ->query(Products_ByUnitsInStock_IndexEntry::class, Products_ByUnitsInStock::class)
    ->whereGreaterThan("UnitsInStock", 10)
    ->ofType(Product::class)
    ->toList();

// Executing the query on the Northwind sample data
// will result in all 47 Product documents that match the query predicate.
// A simple DocumentQuery without paging:
// ======================================

/** @var array<Product> $allResults */
$allResults = $session->advanced()
    ->documentQuery(Products_ByUnitsInStock_IndexEntry::class, Products_ByUnitsInStock::class)
    ->whereGreaterThan("UnitsInStock", 10)
    ->ofType(Product::class)
    ->toList();

// Executing the query on the Northwind sample data
// will result in all 47 Product documents that match the query predicate.
class Products_ByUnitsInStock_IndexEntry
{
    private ?int $unitsInStock = null;

    public function getUnitsInStock(): ?int
    {
        return $this->unitsInStock;
    }

    public function setUnitsInStock(?int $unitsInStock): void
    {
        $this->unitsInStock = $unitsInStock;
    }
}

class Products_ByUnitsInStock extends AbstractIndexCreationTask
{
    public function __construct()
    {
        parent::__construct();

        $this->map = "docs.Products.Select(product => new {" .
            "    UnitsInStock = product.UnitsInStock" .
            "})";
    }
}
from index "Products/ByUnitsInStock"
where UnitsInStock > 10

Paging examples

Retrieve a specific page:

// Retrieve only the 3'rd page - when page size is 10:
// ===================================================

$stats = new QueryStatistics();

/** @var array<Product> $thirdPageResults */
$thirdPageResults = $session
    ->query(Products_ByUnitsInStock_IndexEntry::class, Products_ByUnitsInStock::class)
     // Get the query stats if you wish to know the TOTAL number of results
    ->statistics($stats )
     // Apply some filtering condition as needed
    ->whereGreaterThan("UnitsInStock", 10)
    ->ofType(Product::class)
     // Call 'Skip', pass the number of items to skip from the beginning of the result set
     // Skip the first 20 resulting documents
    ->skip(20)
     // Call 'Take' to define the number of documents to return
     // Take up to 10 products => so 10 is the "Page Size"
    ->take(10)
    ->toList();

// When executing this query on the Northwind sample data,
// results will include only 10 Product documents ("products/45-A" to "products/54-A")

$totalResults = $stats->getTotalResults();

// While the query returns only 10 results,
// `totalResults` will hold the total number of matching documents (47).
// Retrieve only the 3'rd page - when page size is 10:
// ===================================================

$stats = new QueryStatistics();

/** @var array<Product> $thirdPageResults */
$thirdPageResults = $session->advanced()
    ->documentQuery(Products_ByUnitsInStock_IndexEntry::class, Products_ByUnitsInStock::class)
     // Get the query stats if you wish to know the TOTAL number of results
    ->statistics($stats)
     // Apply some filtering condition as needed
    ->whereGreaterThan("UnitsInStock", 10)
    ->ofType(Product::class)
     // Call 'Skip', pass the number of items to skip from the beginning of the result set
     // Skip the first 20 resulting documents
    ->skip(20)
     // Call 'Take' to define the number of documents to return
     // Take up to 10 products => so 10 is the "Page Size"
    ->take(10)
    ->toList();

// When executing this query on the Northwind sample data,
// results will include only 10 Product documents ("products/45-A" to "products/54-A")

$totalResults = $stats->getTotalResults();

// While the query returns only 10 results,
// `totalResults` will hold the total number of matching documents (47).
class Products_ByUnitsInStock_IndexEntry
{
    private ?int $unitsInStock = null;

    public function getUnitsInStock(): ?int
    {
        return $this->unitsInStock;
    }

    public function setUnitsInStock(?int $unitsInStock): void
    {
        $this->unitsInStock = $unitsInStock;
    }
}

class Products_ByUnitsInStock extends AbstractIndexCreationTask
{
    public function __construct()
    {
        parent::__construct();

        $this->map = "docs.Products.Select(product => new {" .
            "    UnitsInStock = product.UnitsInStock" .
            "})";
    }
}
from index "Products/ByUnitsInStock"
where UnitsInStock > 10 
limit 20, 10 // skip 20, take 10

Page through all results:

// Query for all results - page by page:
// =====================================

$pagedResults = null;
$pageNumber = 0;
$pageSize = 10;

do
{
    $pagedResults = $session
        ->query(Products_ByUnitsInStock_IndexEntry::class, Products_ByUnitsInStock::class)
         // Apply some filtering condition as needed
        ->whereGreaterThan("UnitsInStock", 10)
        ->ofType(Product::class)
         // Skip the number of results that were already fetched
        ->skip($pageNumber * $pageSize)
         // Request to get 'pageSize' results
        ->take($pageSize)
        ->toList();

    $pageNumber++;

    // Make any processing needed with the current paged results here
    // ...
}
while (count($pagedResults) > 0); // Fetch next results
// Query for all results - page by page:
// =====================================

$pagedResults = null;
$pageNumber = 0;
$pageSize = 10;

do
{
    $pagedResults = $session->advanced()
        ->documentQuery(Products_ByUnitsInStock_IndexEntry::class, Products_ByUnitsInStock::class)
        // Apply some filtering condition as needed
        ->whereGreaterThan("UnitsInStock", 10)
        ->ofType(Product::class)
        // Skip the number of results that were already fetched
        ->skip($pageNumber * $pageSize)
        // Request to get 'pageSize' results
        ->take($pageSize)
        ->toList();

    $pageNumber++;

    // Make any processing needed with the current paged results here
    // ...
}
while (count($pagedResults) > 0); // Fetch next results
class Products_ByUnitsInStock_IndexEntry
{
    private ?int $unitsInStock = null;

    public function getUnitsInStock(): ?int
    {
        return $this->unitsInStock;
    }

    public function setUnitsInStock(?int $unitsInStock): void
    {
        $this->unitsInStock = $unitsInStock;
    }
}

class Products_ByUnitsInStock extends AbstractIndexCreationTask
{
    public function __construct()
    {
        parent::__construct();

        $this->map = "docs.Products.Select(product => new {" .
            "    UnitsInStock = product.UnitsInStock" .
            "})";
    }
}
from index "Products/ByUnitsInStock"
where UnitsInStock > 10
limit 0, 10 // First loop will skip 0, take 10  

// The next loops in the code will each generate the above RQL with an increased 'skip' value:
// limit 10, 10
// limit 20, 10
// limit 30, 10
// ...

Paging and performance

Better performance:

It is recommended to explicitly set a page size when making a query that is expected to generate a significant number of results. This practice has several benefits:

  • Optimizes bandwidth usage by reducing data transfer between the server and client.
  • Prevents delays in response times caused by sending too much data over the network.
  • Avoids high memory consumption when dealing with numerous documents.
  • Ensures a more manageable user experience by not overwhelming users with massive datasets at once.

Performance hints:

  • By default, if the number of returned results exceeds 2048, the server will issue a "Page size too big" notification (visible in the Studio) with information about the query.

  • This threshold can be customized by modifying the value of the PerformanceHints.MaxNumberOfResults configuration key.

  • As suggested by this performance hint, you may consider using Streaming query results instead of paging.

    Figure 1. Performance Hint

    Performance Hint

Paging through tampered results

  • The QueryStatistics object contains the totalResults property,
    which represents the total number of matching documents found in the query results.

  • The QueryStatistics object also contains the skippedResults property.
    Whenever this property is greater than 0, that implies the server has skipped that number of results from the index.

  • The server will skip duplicate results internally in the following two scenarios:

    1. When making a Projection query with Distinct.

    2. When querying a Fanout index.

  • In these cases:

    • The skippedResults property from the stats object will hold the count of skipped (duplicate) results.

    • The totalResults property will be invalidated -
      it will Not deduct the number of skipped results from the total number of results.

  • To do proper paging in these scenarios:
    include the skippedResults value when specifying the number of documents to skip for each page using:
    (currentPage * pageSize) + skippedResults.

Examples

A projection query with Distinct:

$pagedResults = null;

$totalResults = 0;
$totalUniqueResults = 0;
$skippedResults = 0;

$pageNumber = 0;
$pageSize = 10;

do
{
    $pagedResults = $session
            ->query(Products_ByUnitsInStock_IndexEntry::class, Products_ByUnitsInStock::class)
            ->statistics($stats)
            ->whereGreaterThan("UnitsInStock", 10)
            ->ofType(Product::class)
            // Define a projection
            ->selectFields(ProjectedClass::class)
             // Call Distinct to remove duplicate projected results
            ->distinct()
             // Add the number of skipped results to the "start location"
            ->skip(($pageNumber * $pageSize) + $skippedResults)
             // Define how many items to return
            ->take($pageSize)
            ->toList();

    $totalResults = $stats->getTotalResults();        // Number of total matching documents (includes duplicates)
    $skippedResults += $stats->getSkippedResults();   // Number of duplicate results that were skipped
    $totalUniqueResults += count($pagedResults); // Number of unique results returned in this server call

    $pageNumber++;
}
while (count($pagedResults) > 0); // Fetch next results

// When executing the query on the Northwind sample data:
// ======================================================

// The total matching results reported in the stats is 47 (totalResults),
// but the total unique objects returned while paging the results is only 29 (totalUniqueResults)
// due to the 'Distinct' usage which removes duplicates.

// This is solved by adding the skipped results count to Skip().
$pagedResults = null;

$totalResults = 0;
$totalUniqueResults = 0;
$skippedResults = 0;

$pageNumber = 0;
$pageSize = 10;

do
{
    $pagedResults = $session->advanced()
        ->documentQuery(Products_ByUnitsInStock_IndexEntry::class, Products_ByUnitsInStock::class)
        ->statistics($stats)
        ->whereGreaterThan("UnitsInStock", 10)
        ->ofType(Product::class)
         // Define a projection
        ->selectFields(ProjectedClass::class)
         // Call Distinct to remove duplicate projected results
        ->distinct()
         // Add the number of skipped results to the "start location"
        ->skip(($pageNumber * $pageSize) + $skippedResults)
        ->take($pageSize)
        ->toList();

    $totalResults = $stats->getTotalResults();        // Number of total matching documents (includes duplicates)
    $skippedResults += $stats->getSkippedResults();   // Number of duplicate results that were skipped
    $totalUniqueResults += count($pagedResults);      // Number of unique results returned in this server call

    $pageNumber++;
}
while (count($pagedResults) > 0); // Fetch next results

// When executing the query on the Northwind sample data:
// ======================================================

// The total matching results reported in the stats is 47 (totalResults),
// but the total unique objects returned while paging the results is only 29 (totalUniqueResults)
// due to the 'Distinct' usage which removes duplicates.

// This is solved by adding the skipped results count to Skip().
class Products_ByUnitsInStock_IndexEntry
{
    private ?int $unitsInStock = null;

    public function getUnitsInStock(): ?int
    {
        return $this->unitsInStock;
    }

    public function setUnitsInStock(?int $unitsInStock): void
    {
        $this->unitsInStock = $unitsInStock;
    }
}

class Products_ByUnitsInStock extends AbstractIndexCreationTask
{
    public function __construct()
    {
        parent::__construct();

        $this->map = "docs.Products.Select(product => new {" .
            "    UnitsInStock = product.UnitsInStock" .
            "})";
    }
}
class ProjectedClass
{
    public ?string $category = null;
    public ?string $supplier = null;

    public function getCategory(): ?string
    {
        return $this->category;
    }

    public function setCategory(?string $category): void
    {
        $this->category = $category;
    }

    public function getSupplier(): ?string
    {
        return $this->supplier;
    }

    public function setSupplier(?string $supplier): void
    {
        $this->supplier = $supplier;
    }
}
from index "Products/ByUnitsInStock"
where UnitsInStock > 10
select distinct Category, Supplier
limit 0, 10  // First loop will skip 0, take 10, etc.

Querying a Fanout index:

$pagedResults = null;

$totalResults = 0;
$totalUniqueResults = 0;
$skippedResults = 0;

$pageNumber = 0;
$pageSize = 50;

do
{
    $pagedResults = $session
        ->query(Orders_ByProductName_IndexEntry::class, Orders_ByProductName::class)
        ->statistics($stats)
        ->ofType(Order::class)
         // Add the number of skipped results to the "start location"
        ->skip(($pageNumber * $pageSize) + $skippedResults)
        ->take($pageSize)
        ->toList();

    $totalResults = $stats->getTotalResults();
    $skippedResults += $stats->getSkippedResults();
    $totalUniqueResults += count($pagedResults);

    $pageNumber++;
}
while (count($pagedResults) > 0); // Fetch next results

// When executing the query on the Northwind sample data:
// ======================================================

// The total results reported in the stats is 2155 (totalResults),
// which represent the multiple index-entries generated as defined by the fanout index.

// By adding the skipped results count to the Skip() method,
// we get the correct total unique results which is 830 Order documents.
$pagedResults = null;

$totalResults = 0;
$totalUniqueResults = 0;
$skippedResults = 0;

$pageNumber = 0;
$pageSize = 50;

do
{
    $pagedResults = $session->advanced()
        ->documentQuery(Orders_ByProductName_IndexEntry::class, Orders_ByProductName::class)
        ->statistics($stats)
        ->ofType(Order::class)
         // Add the number of skipped results to the "start location"
        ->skip(($pageNumber * $pageSize) + $skippedResults)
        ->take($pageSize)
        ->toList();

    $totalResults = $stats->getTotalResults();
    $skippedResults += $stats->getSkippedResults();
    $totalUniqueResults += count($pagedResults);

    $pageNumber++;
}
while (count($pagedResults) > 0); // Fetch next results

// When executing the query on the Northwind sample data:
// ======================================================

// The total results reported in the stats is 2155 (totalResults),
// which represent the multiple index-entries generated as defined by the fanout index.

// By adding the skipped results count to the Skip() method,
// we get the correct total unique results which is 830 Order documents.
// A fanout index - creating MULTIPLE index-entries per document:
// ==============================================================

class Orders_ByProductName_IndexEntry
{
    private ?string $productName = null;

    public function getProductName(): ?string
    {
        return $this->productName;
    }

    public function setProductName(?string $productName): void
    {
        $this->productName = $productName;
    }
}
class Orders_ByProductName extends AbstractIndexCreationTask
{
    public function __construct()
    {
        parent::__construct();

        $this->map = "docs.Orders.SelectMany(order => order.Lines, (order, line) => new {" .
                "    Product = line.ProductName " .
                "})";
    }
}
from index "Orders/ByProductName"
limit 0, 50  // First loop will skip 0, take 50, etc.