Upload
hakka-labs
View
460
Download
2
Embed Size (px)
Citation preview
- Thousands of incoming requests/second- Need to for real-time- Reliable fan-out- Hundreds of unreliable APIs
Our Constraints
- Removed AMQP (segmentio/nsq.js)- Co-located- Distributed- Simple and rock solid
Rabbit → NSQ
// Integration Factory
function createIntegration(name){
// Create the constructor to be passed back
function Integration(settings){
this.debug = debug('segmentio:integration:' + this.slug());
this.settings = settings;
this.initialize();
}
Integration.prototype.name = name; // set the name
merge(Integration.prototype, proto); // add prototype methods
merge(Integration, statics); // add static methods
return Integration; // return the constructor
}
var MailChimp = module.exports = integration('MailChimp')
.channels(['server', 'mobile', 'client'])
.endpoint('https://api.mailchimp.com/')
.ensure('settings.datacenter')
.ensure('settings.apiKey')
.ensure('settings.listId', { methods: ['identify'] })
.ensure('message.email')
.mapper(mapper) // map our input to our output
.retries(2);
Integration.prototype.track = function track(payload, fn){
var self = this;
return this
.get('/httpapi') // common request handling
.type('json')
.query({ api_key: this.settings.apiKey })
.query({ event: JSON.stringify(payload) })
.end(function(err, res){
if (err) return fn(err, res);
if ('invalid api_key' == res.text) return fn(self.error('invalid api_key'));
fn(null, res);
});
}
Retriesfunction status(err){
return err.status == 500
|| err.status == 502
|| err.status == 503
|| err.status == 504
|| err.status == 429;
}
function network(err){
return err.code == 'ECONNRESET'
|| err.code == 'ECONNREFUSED'
|| err.code == 'ECONNABORTED'
|| err.code == 'ETIMEDOUT'
|| err.code == 'EADDRINFO'
|| err.code == 'EHOSTUNREACH'
|| err.code == 'ENOTFOUND';
}
API Errors Network Errors
// retry strategy with exponential backoff
if (err.retry) {
var attempts = msg.attempts;
var timeout = jitter(15*Math.pow(attempts, 3));
msg.requeue(timeout);
return;
}
- Microservices everywhere- Docker for isolation- Use metrics religiously
Microservices & Monitoring
module "google-analytics" {
source = "./worker"
cluster = "integration-worker"
memory = "256"
cpu = "128"
name = "google-analytics"
version = "latest"
count = "${var.count}"
}
Scaling your data pipeline
1. Queues not only define service boundaries, but scheduling
2. Microservices and workers can provide great visibility and scalability–as long as they are easy to boot
3. The bigger your surface area, the more visibility and metrics you will need to provide
- In search of fairness- Moving to Kafka- Standard microservice toolkit- Custom data transforms
What’s next?