LB simulatorby Jonathan Ledlie
Model
Running the experiments
./lb -l pareto.60min.4096n.3hours -H 3600 -c gnutella.4096n -C 100 -w 10 -k Z/1.2/4096 -O zipf.v1.i1.ak -f 100 -v 1 -i 1 -s 1000
char *usage = \"Usage: lb, Distributions\n" \" -l physical server lifetime and deathtime file\n" \" -H average physical server lifetime\n" \" -c physical server capacity file (requests/round/server)\n" \" -C average physical server capacity\n" \" -w queries per round per alive node\n" \" -k routing key distribution\n\n" \" Other parameters\n"" -i number of ID choices when joining (default is 1, i.e., off). Zero is opt\n" \" -a [t|p|k] active load balancing method\n" \
" t: virtual server transfer:\n" \" (1) If overloaded and have exactly one virtual server,\n" \" split virtual server into two.\n" \" (2) If overloaded and have more than one virtual server,\n" \" attempt to transfer a virtual servers.\n" \" Never delete or merge virtual servers.\n" \" p: pure virtual servers:\n" \" If overloaded this round and have more than one virtual server,\n" \" choose the least loaded virtual server that will make us unloaded\n" \" and delete it.\n" \" If underloaded and adding an average virtual server workload\n" \" will not put us over capacity,\n" \" create a virtual server\n" \" k: continue to use k-choices by actively rechoosing ids\n" \" based on perceived workload\n" \" g: use Ganesan's Threshold algorithm\n" \
" -U upper slack pct above which we are overloaded (default .90) \n" \" -L lower slack pct below which we are underloaded (default .10)\n" \" -s random seed\n" \" -e epsilon difference between k-choice keys to make shift\n" \
" -o oracle mode\n" \" - node VSs match object distribution\n" \" - all nodes are up at continuously\n" \" -d debug\n" \" -f frequency of runtime summary output (once every 1000 rounds is default)\n" \" -v initial virtual servers per node (1 is default)\n" \" -S shift workload to new distribution halfway through run\n" \" -O output file prefix\n" \" -p k-choices active dampening method\n" \" 1: limit ids\n" \" 2: limit create and delete\n" \" 3: limit both\n" \" -m kchoices (active) min capacity, below which rechoosing is dampened\n\n" \"Distributions:\n" \" Pareto P/scale/shape\n" \" Normal N/mean/stddev\n" \" Zipf Z/alpha/num-elements\n" \" Poisson F/mean\n" \" Uniform U [0..1)\n" \" Constant C/value\n";
Main programint main (int argc, char **argv) {
while (!finished) {DEBUG_P (("ROUND %d\n", cRound));Node Birth & Death
time: pareto distribution capacity: trace driven
VS Creation, Deletion, Splitting, and Transferfour protocols t/p/k/g
Finger Table UpdateQueries
Zipf distribution}
}
class PhysicalServer { private:
int id; double capacity; int rechooseAttempts; set<VirtualServer*> localVs; void setNextLBactionTime (); vector<double> keys; set<double> usedKeys;
public: void setNode (int id, double capacity); double getRandomKey (); bool sendMsg (int senderId, bool isQuery); bool sendMaintMsg (double dstKey); bool createVs (int vsCreateCount, double desiredWork, double &predictedWork); bool splitVs (); bool transferVs (int &vsDelta); double chooseKey (vector<double> keyList, double desiredWork,
double &predictedWork, double skipKey, double &cost, double &distanceFromCenter);
VirtualServer* addVs (double key);};
class VirtualServer { private: double key; double previousKey;public: PhysicalServer *rootPs; Fingers* fingers; bool sendMsg (int senderId, bool isQuery); bool sendMsg (VirtualServer *sender, bool isQuery); bool route (double dstKey, vector<double> &hops,
vector<int> &fingersUsed, bool isQuery); bool route (double dstKey, vector<double> &hops,
vector<int> &fingersUsed, bool isQuery, double dstVsKeyCache); bool findHopDistance (double dstKey, vector<double> &hops,
vector<int> &fingersUsed, double dstVsKeyCache); void merge (VirtualServer *oldGuy); double getArcLength ();};
Node actions
• birth/death (churn)– According to Pareto distribution– From file pareto.60min.4096n.3hours
• Capacity (heterogeneous)– According to gnutella trace– From file gnutella.4096n
0 b 00 b 10 b 100 b 1000 b 10000 b 1002…0 b 9992 b 4062 d 17303 d 26774 d 30065 b 13915 d 19505 d 35877 b 494
0 101 1062 103 104 106…4090 104091 104092 1064093 104094 14095 106
Node creation result
R 0 Action: b node 0R 0 PS 0 starting birthPS 0 create Vs key 0.894653R 0 PS 0 finished birth arc 0.000000 psUp 1 cap 10.000000 (l 0.100000 u 9.900000) lvsSize 1 sysCap 9.900000R 0 Action: b node 1R 0 PS 1 starting birthPS 1 create Vs key 0.128510R 0 PS 1 finished birth arc 0.233856 psUp 2 cap 106.000000 (l 1.060000 u 104.940000) lvsSize 1 sysCap 114.840000R 0 Action: b node 10R 0 PS 10 starting birthPS 10 create Vs key 0.885590R 0 PS 10 finished birth arc 0.757080 psUp 3 cap 106.000000 (l 1.060000 u 104.940000) lvsSize 1 sysCap 219.780000R 0 Action: b node 100
0 101 1062 103 104 1065 106 1067 108 1069 110 106
gnutella.4096n
//Non-oracle mode code
0 b 00 b 10 b 100 b 1000 b 10000 b 10020 b 1006
Node creation result
ROUND 2R 2 PS 406 starting birthPS 406 create Vs key 0.394836R 2 PS 406 finished birth arc 0.000488 psUp 2018 cap 106.000000 (l 1.060000 u 104.940000) lvsSize 1 sysCap 198304.920000PS 1730 death now 2 psUp 2018 vsSize 1PS 1730 removing key 0.127899PS 1730 remove Vs 0.127899
0 b 9970 b 9992 b 4062 d 17303 d 26774 d 30065 b 1391
Node creationpsCount = initNodes (capacityFile, ps, initialVsPerNode);
if (oracle) { //oracle mode}
While (1){ //Non-oracle mode code}
int initNodes (char *filename, PhysicalServer *&ps, int &initialVsPerNode) {
int psCount = 0; vector<int> capacity; filename="gnutella.4096n" ; // by goophy fp = fopen(filename, "r"); // node-id capacity int id, cap; while (fscanf (fp, "%d %d\n", &id, &cap) > 0) { capacity[id] = cap; psCount++; } ps = new PhysicalServer[psCount]; for (int i = 0; i < psCount; i++) { ps[i].setNode (i, (double)(capacity[i])); } initStep (psCount/2 * initialVsPerNode); return psCount;}
Node creation/oracle mode
psCount = initNodes (capacityFile, ps, initialVsPerNode);if (oracle) { psCount /= 2; if (keyDist->getName() == 'z') { oracleAllocateVsZipf (keyDist,ps); } else if (keyDist->getName() == 'u') { oracleAllocateVsUniform (keyDist,ps); } for (int i = 0; i < psCount; i++) { ps[i].birth (0); } psUp = psCount; }
Node creation/nonoracle modewhile (!finished && ((action = nextEvent (nodeId)) != ' ')) { int vsAct = 0; int myInitialVsCount = initialVsPerNode; switch (action) { case 'b': if (activeLBmethod == 'p') { //k-choices active dampening method myInitialVsCount = (int)(ceil (ps[nodeId].getUpperTarget()/10.)); vsAct = ps[nodeId].birth (myInitialVsCount); break; case 'd': vsAct = ps[nodeId].death (); break; case 'r': recordingStats = true; break; case 'w': printf ("shifting workload\n"); delete keyDist; keyDist = distFactory->newDistribution (keyDistStr); break; case 'q': finished = true; break; } }if (!oracle && cRound == 0) { mergeVServers ();}
int PhysicalServer::birth (int initialVsPerNode) { birthTime = cRound; deathTime = -1; util = 0.; usedKeys.clear (); haveDeletedVS = false; rechooseAttempts = 0; thresholdLevel = 0; thresholdShift = false; setNextLBactionTime (); currentTargetCapacity += (double)(upperTarget); systemTargetCapacity += capacity; double VsDesiredWork = 0.; double VsPredictedWork = 0.; if (idChoice == 1) { //number of ID choices when joining, default for (int i = 0; i < initialVsPerNode; i++) {
if (createVs (initialVsPerNode, VsDesiredWork, VsPredictedWork)) { createCount++; } }
} else { int maxToCreate = 3; if (maxToCreate > idChoice) maxToCreate = idChoice / 2; if (cRound < earliestLBround) maxToCreate = 1; bool createOK = true; VsDesiredWork = (upperTarget - lowerTarget) / 2. + lowerTarget; for (int i = 0; createCount == 0 || … maxToCreate > i); i++) {
VsPredictedWork = 0.;createOK = createVs (initialVsPerNode, VsDesiredWork, VsPredictedWork);VsDesiredWork -= VsPredictedWork;
} } psUp++; return createCount;}
VS creation result
R 0 Action: b node 0R 0 PS 0 starting birthPS 0 create Vs key 0.894653R 0 PS 0 finished birth arc 0.000000 psUp 1 cap 10.000000 (l 0.100000 u 9.900000) lvsSize 1 sysCap 9.900000R 0 Action: b node 1R 0 PS 1 starting birthPS 1 create Vs key 0.128510R 0 PS 1 finished birth arc 0.233856 psUp 2 cap 106.000000 (l 1.060000 u 104.940000) lvsSize 1 sysCap 114.840000R 0 Action: b node 10R 0 PS 10 starting birthPS 10 create Vs key 0.885590R 0 PS 10 finished birth arc 0.757080 psUp 3 cap 106.000000 (l 1.060000 u 104.940000) lvsSize 1 sysCap 219.780000R 0 Action: b node 100
0 101 1062 103 104 1065 106 1067 108 1069 110 106
gnutella.4096n
//Non-oracle mode code
0 b 00 b 10 b 100 b 1000 b 10000 b 10020 b 1006
bool PhysicalServer::createVs (int vsCreateCount, double desiredWork, double &predictedWork) {
ASSERT (alive); setNextLBactionTime (); double key; vector<double> keyList; if (idChoice != 1) { if (idChoice > 1) { if (localVs.size() >= idChoice) { return false; } keyList = getSampleKeys(); } statKeyListSize.push_back ((double)(keyList.size())); double cost, gap; key = chooseKey (keyList, desiredWork, predictedWork, -1., cost, gap); } else { key = getRandomKey (); } allKeys.insert (key); bool createdOK = true; …
PhysicalServer::createVs
PhysicalServer::createVs
bool PhysicalServer::createVs (int vsCreateCount, double desiredWork, double &predictedWork) {
… if (cRound == 0) {// Do not merge VSs right at beginning addVs (key); DEBUG_P (("PS %d create Vs key %f\n", id, key)); } else { map<double,VirtualServer*>::iterator succ, pred; succ = vServers.upper_bound (key); if (succ == vServers.end()) succ = vServers.begin(); if (succ->second->getRootPs() != this) {// merge, if we own our predecessor
pred = succ; if (pred == vServers.begin()) {pred = vServers.end(); } pred--; DEBUG_P (("PS %d create Vs key %f\n", id, key)); VirtualServer* newVs = addVs (key); if (pred->second->getRootPs() == this) {
DEBUG_P (("PS %d merging %f into %f\n", id, pred->first,key));newVs->merge (pred->second);int vsCount = 0; deleteLocalVs (pred, vsCount); createdOK = false;
} } else { DEBUG_P (("PS %d aborting creation of %f because we are successor\n",
id, key)); createdOK = false; } } return createdOK; }
VirtualServer* PhysicalServer::addVs (double key) {
VirtualServer *vs = new VirtualServer (key, this); ASSERT (vs);
vServers.insert(pair<double,VirtualServer*>(key,vs)); localVs.insert(vs); return vs;}
setNextLBactionTime
void PhysicalServer::setNextLBactionTime () { if (usingThreshold) nextLBactionTime = cRound + 60; nextLBactionTime = cRound + (int)(poisson ((double)lbActionInterval));}
Main programint main (int argc, char **argv) {
while (!finished) {DEBUG_P (("ROUND %d\n", cRound));Node Birth & Death
time: pareto distribution capacity: trace driven
VS Creation, Deletion, Splitting, and Transferfour protocols t/p/k/g
Finger Table UpdateQueries
Zipf distribution}
}
VS actions
for (int i = 0; activeLBmethod != '-' && !oracle && cRound > earliestLBround && i < psCount; i++) {
int vsCurrentSize = vServers.size(); if (activeLBmethod == 't') { //transfer } else if (activeLBmethod == 'p') { //proportion } else if (activeLBmethod == 'k') { } else if (activeLBmethod == 'g') { }}
VS actions: transferif (ps[i].isOverloaded ()) {
if (ps[i].getVsCount () == 1) {// split can fail if we are adjacent to our neighborif (ps[i].splitVs ()) {
ASSERT (vServers.size() == vsCurrentSize+1);interimStats->splitVs();
}} else {
int vsDelta = 0;if (ps[i].transferVs (vsDelta)) DEBUG_P (("successful transfer\n"));
}} // if isOverloaded
VS actions: proportion if (ps[i].isOverloaded () && ps[i].getVsCount () > 1) { int vsDelta = 0; ps[i].deleteVs(vsDelta); ASSERT (vServers.size() == vsCurrentSize+vsDelta); interimStats->deleteVs(); } else if (ps[i].isUnderloaded() && ps[i].canAddVs(maxVsPerNode)) { DEBUG_P (("PS %d about to createVs\n", i)); double targetWork = ps[i].getExtraCapacity(); double predictedWork = 0.; if (ps[i].createVs (1,targetWork,predictedWork)) { ASSERT (vServers.size() == vsCurrentSize+1); } // increment for any kind of activity interimStats->createVs(); }
VS actions: K-choices if (ps[i].isOverloaded () || ps[i].isUnderloaded()) { int vsDelta = 0; int actionCode = ps[i].rechooseVs (vsDelta); switch (actionCode) { case 0: break; case 1: interimStats->createVs(); break; case 2: interimStats->deleteVs(); break; case 3: interimStats->deleteVs(); interimStats->createVs(); break; default: ASSERT (0); } }
VS actions: g
int vsDelta = 0; int actionCode = ps[i].thresholdVs (vsDelta); switch (actionCode) { case 0: break; case 1: // did neighbor adjust break; case 2: // did reorder interimStats->deleteVs(); interimStats->createVs(); break; default: ASSERT (0); }
Main programint main (int argc, char **argv) {
while (!finished) {DEBUG_P (("ROUND %d\n", cRound));Node Birth & Death
time: pareto distribution capacity: trace driven
VS Creation, Deletion, Splitting, and Transferfour protocols t/p/k/g
Finger Table UpdateQueries
Zipf distribution}
}
R 0 Start of finger updatesVS 0.000061 fixing fingersR 0 PS 2108 sendMsg work 0R 0 VS 0.000061 set finger-1 for 0.000549 is 0.000641R 0 PS 2998 sendMsg work 0R 0 VS 0.000061 set finger-2 for 0.001038 is 0.002472R 0 PS 2998 sendMsg work 1R 0 VS 0.000061 set finger-3 for 0.002014 is 0.002472R 0 PS 3031 sendMsg work 0R 0 VS 0.000061 set finger-4 for 0.003967 is 0.004272R 0 PS 931 sendMsg work 0R 0 VS 0.000061 set finger-5 for 0.007874 is 0.009308R 0 PS 3871 sendMsg work 0R 0 VS 0.000061 set finger-6 for 0.015686 is 0.016144R 0 PS 3537 sendMsg work 0R 0 VS 0.000061 set finger-7 for 0.031311 is 0.031586R 0 PS 87 sendMsg work 0R 0 VS 0.000061 set finger-8 for 0.062561 is 0.063019R 0 PS 2230 sendMsg work 0R 0 VS 0.000061 set finger-9 for 0.125061 is 0.125153R 0 PS 730 sendMsg work 0R 0 VS 0.000061 set finger-10 for 0.250061 is 0.250641R 0 PS 1029 sendMsg work 0R 0 VS 0.000061 set finger-11 for 0.500061 is 0.500214
After round 00 b 9960 b 9970 b 9992 b 4062 d 17303 d 26774 d 30065 b 1391
Finger updating DEBUG_P (("R %d Start of finger updates\n", cRound)); /* * UPDATE FINGER POINTERS */ deadVServersCountVector.push_back ((double)(deadVServers.size())); gracefulVServersCountVector.push_back ((double)(gracefulRelocateVServers.size())); if (!oracle || (oracle && cRound == 0)) { for (map<double,VirtualServer*>::iterator p = vServers.begin();
p != vServers.end(); p++) { int msgCount = p->second->fixFingers (); interimStats->maintMsg (msgCount); } deadVServers.clear (); gracefulRelocateVServers.clear (); } DEBUG_P (("R %d End of finger updates\n", cRound));
Main programint main (int argc, char **argv) {
while (!finished) {DEBUG_P (("ROUND %d\n", cRound));Node Birth & Death
time: pareto distribution capacity: trace driven
VS Creation, Deletion, Splitting, and Transferfour protocols t/p/k/g
Finger Table UpdateQueries
Zipf distribution}
}
QUERIES 1/2
DEBUG_P (("QUERIES\n")); int queryCount = (int)(ceil(psUp * queriesPerRound)); map<double,VirtualServer*>::iterator p = vServers.lower_bound (randPct()); vector<double> dstKeys; dstKeys.reserve (queryCount); for (int i = 0; i < queryCount; i++) { double dstKey = keyDist->next(); //Zipf or Uniform dstKeys.push_back (dstKey); } int queryIndex = -1; int querySuccessCount = 0; deque<bool> previousQuerySuccess; initializePreviousQuerySuccess (previousQuerySuccess);
Query generation
for (vector<double>::iterator q = dstKeys.begin(); q != dstKeys.end(); q++) { queryIndex++; double dstKey = *q; DEBUG_P (("Starting query for key %f\n",dstKey)); p++; if (p == vServers.end()) p = vServers.begin(); VirtualServer *srcVs = p->second; vector<double> hops; vector<int> fingersUsed; bool unlimitedRouteSuccess=srcVs->findHopDistance(dstKey,hops,fingersUsed,-1.); hops.clear(); fingersUsed.clear(); bool routeSuccess = srcVs->route (dstKey, hops, fingersUsed, true); int hopCount = hops.size()-1; if (hopCount < 0) hopCount = 0; if (routeSuccess)
DEBUG_P (("R %d query success for key %f in hops %d\n", cRound, dstKey, hopCount));
else DEBUG_P (("R %d query fail for key %f\n",cRound,dstKey));}
QUERIES 2/2Query processing
Query processing result
Starting query for key 0.428406Query: srcVs 0.137390 PS 3123 dstVs 0.428406PS 3123 VS 0.137390 query dstkey 0.428406PS 3123 VS 0.137390 using finger 10 to 0.388214(PS=2123) hop 0 for query of 0.428406PS 2123 VS 0.388214 query dstkey 0.428406PS 2123 VS 0.388214 using finger 6 to 0.404266(PS=2566) hop 1 for query of 0.428406PS 2566 VS 0.404266 query dstkey 0.428406R 0 PS 1989 got a packet but is overloaded work 14PS 2566 VS 0.404266 query key 0.428406 failedR 0 query fail for key 0.428406
Query processing result
Starting query for key 0.699829Query: srcVs 0.139557 PS 1130 dstVs 0.699829PS 1130 VS 0.139557 query dstkey 0.699829PS 1130 VS 0.139557 using finger 11 to 0.639587(PS=1619) hop 0 for query of 0.699829PS 1619 VS 0.639587 query dstkey 0.699829PS 1619 VS 0.639587 using finger 5 to 0.647430(PS=329) hop 1 for query of 0.699829PS 329 VS 0.647430 query dstkey 0.699829PS 329 VS 0.647430 using finger 6 to 0.663086(PS=1470) hop 2 for query of 0.699829R 0 query fail for key 0.699829
bool VirtualServer::route (double dstKey, vector<double> &hops, vector<int> &fingersUsed, bool isQuery,double dstVsKeyCache) { if (hops.size() > maxHops) maxHops = hops.size(); DEBUG_P (("PS %d VS %f query dstkey %f\n", rootPs->getId(), key, dstKey)); if (key == dstVsKeyCache) return true; //done double fingerKey = fingers->findFinger(key,dstKey,fingerIndex,dstVsKeyCache); map<double,VirtualServer*>::iterator fingerIter = vServers.find (fingerKey); VirtualServer *nextHop = NULL; if (fingerIter != vServers.end()) nextHop = fingerIter->second; if (nextHop->sendMsg (this,isQuery)) { //check if destination is overloaded or not fingers->heartbeat (fingerIndex); bool revisit = false; int minFinger = 100; for (int i = 0; i < hops.size(); i++) if (fingersUsed[i] < minFinger) minFinger = fingersUsed[i]; if (fingerIndex > minFinger) {increasingFingerStat++; return false; } hops.push_back(key); fingersUsed.push_back (fingerIndex); return nextHop->route (dstKey, hops, fingersUsed, isQuery, dstVsKeyCache); } else // destination is overloaded routeSendMsgFailedDist[nextHop->getRootPs()->getCapacityIndex()]++; return false; // "PS %d VS %f query key %f failed\n", rootPs->getId(), key, dstKey));}
Actual processing/msg routing