Operations: How to Add ETL
This API article demonstrates how to create various ETL tasks and the connection strings required.
-
You can add an ETL task by using the AddEtlOperation method.
-
ETL tasks are ongoing tasks that:
- Extract selected data from your source database when changes are made or new data is added.
- Apply a transform script on the data.
- Load the transformed data to a destination that you designate.
-
To learn more about ETL (Extract, Transfer, Load) ongoing tasks, see this article on ETL Basics.
-
To learn how to manage ETL tasks using the Studio see: RavenDB ETL Task.
In this page:
AddEtlOperation
public AddEtlOperation(EtlConfiguration<T> configuration)
Parameters | ||
---|---|---|
configuration | EtlConfiguration<T> |
ETL configuration where T is connection string type |
Add Raven ETL
Raven ETL tasks enable ongoing Extract, Transform, Load functionality from a RavenDB source database to a RavenDB destination.
Creating a Connection String for Raven ETL
Secure servers
To connect secure RavenDB servers you need to
- Export the server certificate from the source server.
- Install it as a client certificate on the destination server.
This can be done in the RavenDB Studio -> Server Management -> Certificates view.
- After passing the certificate, you can either create an ETL with a connection string and transformation script via the studio or with the following API.
{
//define connection string
var ravenConnectionString = new RavenConnectionString()
{
//name connection string
Name = "raven-connection-string-name",
//define appropriate node
//Be sure that the node definition in the connection string has the "s" in https
TopologyDiscoveryUrls = new[] { "https://127.0.0.1:8080" },
//define database to connect with on the node
Database = "Northwind",
};
//create the connection string
var resultRavenString = store.Maintenance.Send(
new PutConnectionStringOperation<RavenConnectionString>(ravenConnectionString));
}
Code Sample to Add Raven ETL
AddEtlOperation<RavenConnectionString> operation = new AddEtlOperation<RavenConnectionString>(
new RavenEtlConfiguration
{
ConnectionStringName = "raven-connection-string-name",
Name = "Employees ETL",
Transforms =
{
new Transformation
{
Name = "Script #1",
Collections =
{
"Employees"
},
Script = @"loadToEmployees ({
Name: this.FirstName + ' ' + this.LastName,
Title: this.Title
});"
}
},
// Do not prevent task failover to another node
PinToMentorNode = false
});
AddEtlOperationResult result = store.Maintenance.Send(operation);
Add Sql ETL
SQL ETL tasks enable ongoing Extract, Transform, Load functionality from RavenDB to SQL databases.
Creating a Connection String for Sql ETL
{
// define new connection string
PutConnectionStringOperation<SqlConnectionString> operation
= new PutConnectionStringOperation<SqlConnectionString>(
new SqlConnectionString
{
// name connection string
Name = "local_mysql",
// define FactoryName
FactoryName = "MySql.Data.MySqlClient",
// define database - may also need to define authentication and encryption parameters
// by default, encrypted databases are sent over encrypted channels
ConnectionString = "host=127.0.0.1;user=root;database=Northwind"
});
// create connection string
PutConnectionStringResult connectionStringResult
= store.Maintenance.Send(operation);
}
Code Sample to Add Sql ETL
AddEtlOperation<SqlConnectionString> operation = new AddEtlOperation<SqlConnectionString>(
new SqlEtlConfiguration
{
ConnectionStringName = "sql-connection-string-name",
Name = "Orders to SQL",
SqlTables = {
new SqlEtlTable {TableName = "Orders", DocumentIdColumn = "Id", InsertOnlyMode = false},
new SqlEtlTable {TableName = "OrderLines", DocumentIdColumn = "OrderId", InsertOnlyMode = false},
},
Transforms =
{
new Transformation
{
Name = "Script #1",
Collections =
{
"Orders"
},
Script = @"var orderData = {
Id: id(this),
OrderLinesCount: this.Lines.length,
TotalCost: 0
};
for (var i = 0; i < this.Lines.length; i++) {
var line = this.Lines[i];
orderData.TotalCost += line.PricePerUnit;
// Load to SQL table 'OrderLines'
loadToOrderLines({
OrderId: id(this),
Qty: line.Quantity,
Product: line.Product,
Cost: line.PricePerUnit
});
}
orderData.TotalCost = Math.round(orderData.TotalCost * 100) / 100;
// Load to SQL table 'Orders'
loadToOrders(orderData)"
}
},
// Do not prevent task failover to another node
PinToMentorNode = false
});
AddEtlOperationResult result = store.Maintenance.Send(operation);
Add OLAP ETL
Olap ETL is an ETL process that converts RavenDB data to the Apache Parquet file format
and sends it to local storage, cloud servers or File Transfer Protocol.
Creating a Connection String for Olap ETL
The following code sample is for a connection string to a local machine.
var myOlapConnectionString = new OlapConnectionString
{
Name = connectionStringName,
LocalSettings = new LocalSettings
{
FolderPath = path
}
};
var resultOlapString = store.Maintenance.Send
(new PutConnectionStringOperation<OlapConnectionString>(myOlapConnectionString));
To connect to a cloud instance, see the Olap ETL article.
The following code sample is for a connection string to Amazon AWS. If you use Google or Microsoft cloud servers, change the parameters accordingly.
var myOlapConnectionString = new OlapConnectionString
{
Name = "myConnectionStringName",
S3Settings = new S3Settings
{
BucketName = "myBucket",
RemoteFolderName = "my/folder/name",
AwsAccessKey = "myAccessKey",
AwsSecretKey = "myPassword",
AwsRegionName = "us-east-1"
}
};
var resultOlapString = store.Maintenance.Send(new PutConnectionStringOperation<OlapConnectionString>(myOlapConnectionString));
Code Sample to Add Olap ETL
AddEtlOperation<OlapConnectionString> operation = new AddEtlOperation<OlapConnectionString>(
new OlapEtlConfiguration
{
ConnectionStringName = "olap-connection-string-name",
Name = "Orders ETL",
Transforms =
{
new Transformation
{
Name = "Script #1",
Collections =
{
"Orders"
},
Script = @"var orderDate = new Date(this.OrderedAt);
var year = orderDate.getFullYear();
var month = orderDate.getMonth();
var key = new Date(year, month);
loadToOrders(key, {
Company : this.Company,
ShipVia : this.ShipVia
})"
}
}
});
AddEtlOperationResult result = store.Maintenance.Send(operation);
Add Elasticsearch ETL Task
Elasticsearch ETL Task tasks enable ongoing Extract, Transform, Load functionality from RavenDB to Elasticsearch destinations.
Creating a Connection String for Elasticsearch ETL
Add an Elasticsearch Connection String
// Create a Connection String to Elasticsearch
var elasticSearchConnectionString = new ElasticSearchConnectionString
{
// Connection String Name
Name = "ElasticConStr",
// Elasticsearch Nodes URLs
Nodes = new[] { "http://localhost:9200" },
// Authentication Method
Authentication = new Raven.Client.Documents.Operations.ETL.ElasticSearch.Authentication
{
Basic = new BasicAuthentication
{
Username = "John",
Password = "32n4j5kp8"
}
}
};
store.Maintenance.Send(new PutConnectionStringOperation<ElasticSearchConnectionString>(elasticSearchConnectionString));
Code Sample to Add Elasticsearch ETL
// Create an Elasticsearch ETL task
AddEtlOperation<ElasticSearchConnectionString> operation = new AddEtlOperation<ElasticSearchConnectionString>(
new ElasticSearchEtlConfiguration()
{
ConnectionStringName = elasticSearchConnectionString.Name, // Connection String name
Name = "ElasticsearchEtlTask", // ETL Task name
ElasticIndexes =
{
// Define Elasticsearch Indexes
new ElasticSearchIndex { // Elasticsearch Index name
IndexName = "orders",
// The Elasticsearch document property that will contain
// the source RavenDB document id.
// Make sure this property is also defined inside the
// transform script.
DocumentIdProperty = "DocId",
InsertOnlyMode = false },
new ElasticSearchIndex { IndexName = "lines",
DocumentIdProperty = "OrderLinesCount",
// If true, don't send _delete_by_query before appending docs
InsertOnlyMode = true
}
},
Transforms =
{ // Transformation script configuration
new Transformation()
{
// RavenDB collections that the script uses
Collections = { "Orders" },
Script = @"var orderData = {
DocId: id(this),
OrderLinesCount: this.Lines.length,
TotalCost: 0
};
// Write the `orderData` as a document to the Elasticsearch 'orders' index
loadToOrders(orderData);",
// Transformation script Name
Name = "TransformIDsAndLinesCount"
}
}
});
store.Maintenance.Send(operation);