-
Notifications
You must be signed in to change notification settings - Fork 99
Expand file tree
/
Copy pathConnectedComponents.java
More file actions
147 lines (121 loc) · 5.45 KB
/
ConnectedComponents.java
File metadata and controls
147 lines (121 loc) · 5.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
package edu.cmu.graphchi.apps;
import edu.cmu.graphchi.*;
import edu.cmu.graphchi.datablocks.IntConverter;
import edu.cmu.graphchi.engine.GraphChiEngine;
import edu.cmu.graphchi.engine.VertexInterval;
import edu.cmu.graphchi.preprocessing.EdgeProcessor;
import edu.cmu.graphchi.preprocessing.FastSharder;
import edu.cmu.graphchi.preprocessing.VertexProcessor;
import edu.cmu.graphchi.util.LabelAnalysis;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.logging.Logger;
/**
* Example application for computing the weakly connected components
* of a graph. The algorithm uses label exchange: each vertex first chooses
* a label equaling its id; on the subsequent iterations each vertex sets
* its label to be the minimum of the neighbors' labels and its current label.
* Algorithm finishes when no labels change. Each vertex with same label belongs
* to same component.
* @author akyrola
*/
public class ConnectedComponents implements GraphChiProgram<Integer, Integer> {
private static Logger logger = ChiLogger.getLogger("connectedcomponents");
public void update(ChiVertex<Integer, Integer> vertex, GraphChiContext context) {
final int iteration = context.getIteration();
final int numEdges = vertex.numEdges();
/* On first iteration, each vertex chooses a label equalling its id */
if (iteration == 0) {
vertex.setValue(vertex.getId());
/* Schedule the vertex itself for execution on next iteration */
context.getScheduler().addTask(vertex.getId());
}
/* Choose the smallest id of neighbor vertices. Each vertex
writes its label to its edges, so it can be accessed by neighbors.
*/
int curMin = vertex.getValue();
for(int i=0; i < numEdges; i++) {
int nbLabel = vertex.edge(i).getValue();
if (iteration == 0) nbLabel = vertex.edge(i).getVertexId(); // Note!
if (nbLabel < curMin) {
curMin = nbLabel;
}
}
/**
* Set my new label
*/
vertex.setValue(curMin);
int label = curMin;
/**
* Broadcast my value to neighbors by writing the value to my edges.
*/
if (iteration > 0) {
for(int i=0; i < numEdges; i++) {
if (vertex.edge(i).getValue() > label) {
vertex.edge(i).setValue(label);
context.getScheduler().addTask(vertex.edge(i).getVertexId());
}
}
} else {
// Special case for first iteration to avoid overwriting
for(int i=0; i < vertex.numOutEdges(); i++) {
vertex.outEdge(i).setValue(label);
}
}
}
public void beginIteration(GraphChiContext ctx) {}
public void endIteration(GraphChiContext ctx) {}
public void beginInterval(GraphChiContext ctx, VertexInterval interval) {}
public void endInterval(GraphChiContext ctx, VertexInterval interval) {}
public void beginSubInterval(GraphChiContext ctx, VertexInterval interval) {}
public void endSubInterval(GraphChiContext ctx, VertexInterval interval) {}
/**
* Initialize the sharder-program.
* @param graphName
* @param numShards
* @return
* @throws java.io.IOException
*/
protected static FastSharder createSharder(String graphName, int numShards) throws IOException {
return new FastSharder<Integer, Integer>(graphName, numShards, new VertexProcessor<Integer>() {
public Integer receiveVertexValue(int vertexId, String token) {
return 0;
}
}, new EdgeProcessor<Integer>() {
public Integer receiveEdge(int from, int to, String token) {
return 0;
}
}, new IntConverter(), new IntConverter());
}
/**
* Usage: java edu.cmu.graphchi.demo.ConnectedComponents graph-name num-shards filetype(edgelist|adjlist)
* For specifying the number of shards, 20-50 million edges/shard is often a good configuration.
*/
public static void main(String[] args) throws Exception {
String baseFilename = args[0];
int nShards = Integer.parseInt(args[1]);
String fileType = (args.length >= 3 ? args[2] : null);
/* Create shards */
FastSharder sharder = createSharder(baseFilename, nShards);
if (baseFilename.equals("pipein")) { // Allow piping graph in
sharder.shard(System.in, fileType);
} else {
if (!new File(ChiFilenames.getFilenameIntervals(baseFilename, nShards)).exists()) {
sharder.shard(new FileInputStream(new File(baseFilename)), fileType);
} else {
logger.info("Found shards -- no need to preprocess");
}
}
/* Run GraphChi ... */
GraphChiEngine<Integer, Integer> engine = new GraphChiEngine<Integer, Integer>(baseFilename, nShards);
engine.setEdataConverter(new IntConverter());
engine.setVertexDataConverter(new IntConverter());
engine.setEnableScheduler(true);
engine.run(new ConnectedComponents(), 5);
logger.info("Ready. Going to output...");
/* Process output. The output file has format <vertex-id, component-id> */
LabelAnalysis.computeLabels(baseFilename, engine.numVertices(), engine.getVertexIdTranslate());
logger.info("Finished. See file: " + baseFilename + ".components");
}
}