see on GitHub

Faceted Search

When displaying a large amount of data, often paging is used to make viewing the data manageable. However it's also useful to give some context of the entire data-set and a easy way to drill-down into particular categories. The common approach to doing this is "faceted search", as shown in the image below. Note how the count of each category within the current search is across the top.

Facets


To achieve this in RavenDB, lets say you have a document like this:

public class Camera {
  private int id;

  private Date dateOfListing;
  private String manufacturer;
  private String model;
  private Double cost;

  private int zoom;
  private double megapixels;
  private boolean imageStabilizer;
  private List<String> advancedFeatures;

  public int getId() {
    return id;
  }
  public void setId(int id) {
    this.id = id;
  }
  public Date getDateOfListing() {
    return dateOfListing;
  }
  public void setDateOfListing(Date dateOfListing) {
    this.dateOfListing = dateOfListing;
  }
  public String getManufacturer() {
    return manufacturer;
  }
  public void setManufacturer(String manufacturer) {
    this.manufacturer = manufacturer;
  }
  public String getModel() {
    return model;
  }
  public void setModel(String model) {
    this.model = model;
  }
  public Double getCost() {
    return cost;
  }
  public void setCost(Double cost) {
    this.cost = cost;
  }
  public int getZoom() {
    return zoom;
  }
  public void setZoom(int zoom) {
    this.zoom = zoom;
  }
  public double getMegapixels() {
    return megapixels;
  }
  public void setMegapixels(double megapixels) {
    this.megapixels = megapixels;
  }
  public boolean isImageStabilizer() {
    return imageStabilizer;
  }
  public void setImageStabilizer(boolean imageStabilizer) {
    this.imageStabilizer = imageStabilizer;
  }
  public List<String> getAdvancedFeatures() {
    return advancedFeatures;
  }
  public void setAdvancedFeatures(List<String> advancedFeatures) {
    this.advancedFeatures = advancedFeatures;
  }
}

Step 1

Create an index to work against, this can be setup like so:

public static class Cameras_ByManufacturerModelCostDateOfListingAndMegapixels extends AbstractIndexCreationTask {
  public Cameras_ByManufacturerModelCostDateOfListingAndMegapixels() {
    map =
     " from camera in docs.Cameras  " +
     " select new                   " +
     " {                            " +
     "     camera.Manufacturer,     " +
     "     camera.Model,            " +
     "     camera.Cost,             " +
     "     camera.DateOfListing,    " +
     "     camera.Megapixels        " +
     " }; ";
  }
}

Step 2

Next you need to setup your facet definitions:

QCamera c = QCamera.camera;

List<Facet> facets = new ArrayList<>();
Facet f1 = new Facet();
f1.setName(c.manufacturer);
facets.add(f1);

Facet f2 = new Facet();
f2.setName(c.cost);
f2.setRanges(c.cost.lt(200),
    c.cost.gt(200).and(c.cost.lt(400)),
    c.cost.gt(400).and(c.cost.lt(600)),
    c.cost.gt(600).and(c.cost.lt(800)),
    c.cost.gt(800));
facets.add(f2);

Facet f3 = new Facet();
f3.setName(c.megapixels);
f3.setRanges(c.megapixels.lt(3),
    c.megapixels.gt(3).and(c.megapixels.lt(7)),
    c.megapixels.gt(7).and(c.megapixels.lt(10)),
    c.megapixels.gt(10));
facets.add(f3);

This tells RavenDB that you would like to get the following facets:

  • For the Manufacturer field look at the documents and return a count for each unique Term found
  • For the Cost field, return the count of the following ranges:
    • Cost <= 200.0
    • 200.0 <= Cost <= 400.0
    • 400.0 <= Cost <= 600.0
    • 600.0 <= Cost <= 800.0
    • Cost >= 800.0
  • For the Megapixels field, return the count of the following ranges:
    • Megapixels <= 3.0
    • 3.0 <= Megapixels <= 7.0
    • 7.0 <= Megapixels <= 10.0
    • Megapixels >= 10.0

Step 3

Finally you can write the following code and you get back the data below:

QCamera c = QCamera.camera;
FacetResults facetResults = session
  .query(Camera.class, Cameras_ByManufacturerModelCostDateOfListingAndMegapixels.class)
  .where(c.cost.goe(100).and(c.cost.loe(300)))
  .toFacets(facets);
QCamera c = QCamera.camera;
FacetResults facetResults = session
  .advanced()
  .documentQuery(Camera.class, Cameras_ByManufacturerModelCostDateOfListingAndMegapixels.class)
  .whereBetweenOrEqual(c.cost, 100.0, 300.0)
  .toFacets(facets);
FacetResults facetResults = store
  .getDatabaseCommands()
  .getFacets("Cameras/ByManufacturerModelCostDateOfListingAndMegapixels",
    new IndexQuery("Cost_Range:[Dx100 TO Dx300]"),
    facets);
QCamera c = QCamera.camera;

List<Facet> facets = new ArrayList<>();
Facet f1 = new Facet();
f1.setName(c.manufacturer);
facets.add(f1);

Facet f2 = new Facet();
f2.setName(c.cost);
f2.setRanges(c.cost.lt(200),
    c.cost.gt(200).and(c.cost.lt(400)),
    c.cost.gt(400).and(c.cost.lt(600)),
    c.cost.gt(600).and(c.cost.lt(800)),
    c.cost.gt(800));
facets.add(f2);

Facet f3 = new Facet();
f3.setName(c.megapixels);
f3.setRanges(c.megapixels.lt(3),
    c.megapixels.gt(3).and(c.megapixels.lt(7)),
    c.megapixels.gt(7).and(c.megapixels.lt(10)),
    c.megapixels.gt(10));
facets.add(f3);

The data below represents the sample faceted data that satisfies above query:

{
   Manufacturer: [
      {
         Range: 'canon',
         Count: 42
      },
      {
         Range: 'jessops',
         Count: 50
      },
      {
         Range: 'nikon',
         Count: 46
      },
      {
         Range: 'phillips',
         Count: 44
      },
      {
         Range: 'sony',
         Count: 35
      }
   ],
   Cost_Range: [
      {
         Range: '[NULL TO Dx200.0]',
         Count: 115
      },
      {
         Range: '[Dx200.0 TO Dx400.0]',
         Count: 102
      }
   ],
   Megapixels_Range: [
      {
         Range: '[NULL TO Dx3.0]',
         Count: 42
      },
      {
         Range: '[Dx3.0 TO Dx7.0]',
         Count: 79
      },
      {
         Range: '[Dx7.0 TO Dx10.0]',
         Count: 82
      },
      {
         Range: '[Dx10.0 TO NULL]',
         Count: 14
      }
   ]
}

Storing facets

Alternatively, if you do not have to change your facets dynamically, you can store your facets as FacetSetup document and pass the document Id instead of the list each time:

session.store(new FacetSetup("facets/CameraFacets", facets));

QCamera c = QCamera.camera;
FacetResults facetResults = session
  .query(Camera.class, Cameras_ByManufacturerModelCostDateOfListingAndMegapixels.class)
  .where(c.cost.goe(100).and(c.cost.loe(300)))
  .toFacets("facets/CameraFacets");
QCamera c = QCamera.camera;
FacetResults facetResults = session
  .advanced()
  .documentQuery(Camera.class, Cameras_ByManufacturerModelCostDateOfListingAndMegapixels.class)
  .whereBetweenOrEqual(c.cost, 100.0, 300.0)
  .toFacets("facets/CameraFacets");
FacetResults facetResults = store
  .getDatabaseCommands()
  .getFacets("Cameras/ByManufacturerModelCostDateOfListingAndMegapixels",
    new IndexQuery("Cost_Range:[Dx100 TO Dx300]"),
    "facets/CameraFacets");
QCamera c = QCamera.camera;

List<Facet> facets = new ArrayList<>();
Facet f1 = new Facet();
f1.setName(c.manufacturer);
facets.add(f1);

Facet f2 = new Facet();
f2.setName(c.cost);
f2.setRanges(c.cost.lt(200),
    c.cost.gt(200).and(c.cost.lt(400)),
    c.cost.gt(400).and(c.cost.lt(600)),
    c.cost.gt(600).and(c.cost.lt(800)),
    c.cost.gt(800));
facets.add(f2);

Facet f3 = new Facet();
f3.setName(c.megapixels);
f3.setRanges(c.megapixels.lt(3),
    c.megapixels.gt(3).and(c.megapixels.lt(7)),
    c.megapixels.gt(7).and(c.megapixels.lt(10)),
    c.megapixels.gt(10));
facets.add(f3);

Stale results

The faceted search does not take into account a staleness of an index. You can't wait for non stale results by customizing your query with one of waitForNonStaleResultsXXX method.