Querying: Faceted (Aggregation) Search
When displaying a large amount of data, paging is often used to make viewing the data manageable. It's also useful to give some context of the entire data-set and a easy way to drill-down into particular categories. The common approach to doing this is a "faceted search", as shown in the image below. Note how the count of each category within the current search is across the top.
Let's start with defining a document like this:
public class Camera {
private Date dateOfListing;
private String model;
private double cost;
private int zoom;
private double megapixels;
private boolean imageStabilizer;
private String manufacturer;
public Date getDateOfListing() {
return dateOfListing;
}
public void setDateOfListing(Date dateOfListing) {
this.dateOfListing = dateOfListing;
}
public String getModel() {
return model;
}
public void setModel(String model) {
this.model = model;
}
public double getCost() {
return cost;
}
public void setCost(double cost) {
this.cost = cost;
}
public int getZoom() {
return zoom;
}
public void setZoom(int zoom) {
this.zoom = zoom;
}
public double getMegapixels() {
return megapixels;
}
public void setMegapixels(double megapixels) {
this.megapixels = megapixels;
}
public boolean isImageStabilizer() {
return imageStabilizer;
}
public void setImageStabilizer(boolean imageStabilizer) {
this.imageStabilizer = imageStabilizer;
}
public String getManufacturer() {
return manufacturer;
}
public void setManufacturer(String manufacturer) {
this.manufacturer = manufacturer;
}
}
Step 1
Create an index to work against.
public class Cameras_ByManufacturerModelCostDateOfListingAndMegapixels extends AbstractIndexCreationTask {
public Cameras_ByManufacturerModelCostDateOfListingAndMegapixels() {
map = "from camera in docs.Cameras " +
"select new {" +
" camera.manufacturer," +
" camera.model," +
" camera.cost," +
" camera.dateOfListing," +
" camera.megapixels" +
"} ";
}
}
Step 2
Setup your facet definitions:
Facet facet1 = new Facet();
facet1.setFieldName("manufacturer");
RangeFacet facet2 = new RangeFacet();
facet2.setRanges(Arrays.asList(
"cost <= 200",
"cost between 200 and 400",
"cost between 400 and 600",
"cost between 600 and 800",
"cost >= 800"
));
RangeFacet facet3 = new RangeFacet();
facet3.setRanges(Arrays.asList(
"megapixels < 3",
"megapixels between 3 and 7",
"megapixels between 7 and 10",
"megapixels >= 10"
));
List<Facet> facets = Arrays.asList(facet1);
List<RangeFacet> rangeFacets = Arrays.asList(facet2, facet3);
This tells RavenDB that you would like to get the following facets:
-
For the manufacturer field, look at the documents and return a count for each unique Term found.
-
For the cost field, return the count of the following ranges:
- cost < 200.0
- 200.0 <= cost < 400.0
- 400.0 <= cost < 600.0
- 600.0 <= cost < 800.0
- cost >= 800.0
-
For the megapixels field, return the count of the following ranges:
- megapixels <= 3.0
- 3.0 <= megapixels < 7.0
- 7.0 <= megapixels < 10.0
- megapixels >= 10.0
Step 3
You can write the following code to get back the data below:
Map<String, FacetResult> facetResults = session
.query(Camera.class, Cameras_ByManufacturerModelCostDateOfListingAndMegapixels.class)
.whereBetween("cost", 100, 300)
.aggregateBy(facets)
.execute();
Facet facet1 = new Facet();
facet1.setFieldName("manufacturer");
RangeFacet facet2 = new RangeFacet();
facet2.setRanges(Arrays.asList(
"cost <= 200",
"cost between 200 and 400",
"cost between 400 and 600",
"cost between 600 and 800",
"cost >= 800"
));
RangeFacet facet3 = new RangeFacet();
facet3.setRanges(Arrays.asList(
"megapixels < 3",
"megapixels between 3 and 7",
"megapixels between 7 and 10",
"megapixels >= 10"
));
List<Facet> facets = Arrays.asList(facet1);
List<RangeFacet> rangeFacets = Arrays.asList(facet2, facet3);
from index 'Cameras/ByManufacturerModelCostDateOfListingAndMegapixels'
where cost between 100 and 300
select facet(manufacturer), facet(cost <= 200, cost between 200 and 400, cost between 400 and 600, cost between 600 and 800, cost >= 800), facet(megapixels <= 3, megapixels between 3 and 7, megapixels between 7 and 10, megapixels >= 10)
This data represents the sample faceted data that satisfies the above query:
[
{
"Name": "manufacturer",
"Values": [
{
"Count": 1,
"Range": "canon"
},
{
"Count": 2,
"Range": "jessops"
},
{
"Count": 1,
"Range": "nikon"
},
{
"Count": 1,
"Range": "phillips"
},
{
"Count": 3,
"Range": "sony"
}
]
},
{
"Name": "cost",
"Values": [
{
"Count": 6,
"Range": "cost <= 200"
},
{
"Count": 2,
"Range": "cost between 200 and 400"
},
{
"Count": 0,
"Range": "cost between 400 and 600"
},
{
"Count": 0,
"Range": "cost between 600 and 800"
},
{
"Count": 0,
"Range": "cost >= 800"
}
]
},
{
"Name": "megapixels",
"Values": [
{
"Count": 0,
"Range": "megapixels <= 3"
},
{
"Count": 6,
"Range": "megapixels between 3 and 7"
},
{
"Count": 1,
"Range": "megapixels between 7 and 10"
},
{
"Count": 1,
"Range": "megapixels >= 10"
}
]
}
]
Storing Facets
If you do not have to change your facets dynamically, you can store your facets as a FacetSetup
document and pass the document ID instead of the list each time:
FacetSetup facetSetup = new FacetSetup();
facetSetup.setFacets(facets);
facetSetup.setRangeFacets(rangeFacets);
session.store(facetSetup, "facets/CameraFacets");
Map<String, FacetResult> facetResults = session
.query(Camera.class, Cameras_ByManufacturerModelCostDateOfListingAndMegapixels.class)
.whereBetween("cost", 100, 300)
.aggregateUsing("facets/CameraFacets")
.execute();
Facet facet1 = new Facet();
facet1.setFieldName("manufacturer");
RangeFacet facet2 = new RangeFacet();
facet2.setRanges(Arrays.asList(
"cost <= 200",
"cost between 200 and 400",
"cost between 400 and 600",
"cost between 600 and 800",
"cost >= 800"
));
RangeFacet facet3 = new RangeFacet();
facet3.setRanges(Arrays.asList(
"megapixels < 3",
"megapixels between 3 and 7",
"megapixels between 7 and 10",
"megapixels >= 10"
));
List<Facet> facets = Arrays.asList(facet1);
List<RangeFacet> rangeFacets = Arrays.asList(facet2, facet3);
from index 'Cameras/ByManufacturerModelCostDateOfListingAndMegapixels'
where cost between 100 and 300
select facet(id('facets/CameraFacets'))
Stale Results
The faceted search does not take into account a staleness of an index. You can wait for non stale results by customizing your query with the waitForNonStaleResults
method.
Fluent API
As an alternative for creating a list of facets and passing it to the aggregateBy
method, RavenDB also exposes a dynamic API where you can create your facets using a builder. You can read more about those methods in our dedicated Client API article here.