union-find disjoint sets
Summary
Union-find disjoint sets(UFDS)
- store disjoint sets
- union sets or check if two elements are in the same set
Hash table implementation
- table values are pointers to the parent entry
- harder to keep track of rank and size
| Operation | Method | Performance |
|---|---|---|
findSet(i) | follow the pointer until the the entry where the pointer points to itself | - |
unionSet(i,j) | change the pointer of one of the roots to the other | - |
assuming path compression is used, might deteriorate to
otherwise
Concept
UFDS operations
findSet(i)- get an identifier for the set that
ibelongs to
- get an identifier for the set that
unionSet(i,j)- join the sets that
iandjbelong to into one set
- join the sets that
Abstraction
- as a forest, where each tree represents a disjoint set/partition
- the root acts as the identifier for that set
- 3 disjoint sets
Path compression
- keep the trees as flat as possible
java
public int findSet(int i) {
if (p.get(i) == i) // end case when we reach the root node
return i;
else {
int ret = findSet(p.get(i)); // traverse further to find the root
p.set(i, ret); // set the root as the parent of the current node, flattening
return ret;
}
}
- flattening a very tall tree:
findSet(3)
even though
findSet(i)might beon tall trees, using path compression amortizes it to
Union by rank hueristic
- rank != height, since height might decrease after path compression, but rank won’t
- join the shorter tree into the taller tree
- if they are the same rank, then the rank of the union only increases by 1
java
public void unionSet(int i, int j) {
if (!isSameSet(i, j)) { // ensure that they are not already part of the same set
numSets--;
int x = findSet(i), y = findSet(j);
// rank is used to keep the tree short
if (rank.get(x) > rank.get(y)) {
p.set(y, x); // put j's set under i's, if it has a smaller rank
setSize.set(x, setSize.get(x) + setSize.get(y));
} else {
p.set(x, y); // put i's set under j's by default
setSize.set(y, setSize.get(y) + setSize.get(x));
if (rank.get(x) == rank.get(y))
rank.set(y, rank.get(y) + 1);
}
}
}
- union of two trees:
unionSet(1,0)
- union by root element on two trees of equal rank:
unionSet(4,0)
rank/height will increase by at most 1
- union with path compression on two trees of equal rank:
unionSet(6,2)
rank doesn’t decrease like height might, and for trees of equal initial rank,
iis unioned intoj
Tall trees
- minimum size of the tree in order of a certain height
- since height increases by 1 at most
Inverse ackermann function
- a function that grows very slowly
Application
Kattis: Tildes
java
BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
PrintWriter pw = new PrintWriter(System.out);
String[] one = br.readLine().split(" ");
int n = Integer.parseInt(one[0]), q = Integer.parseInt(one[1]);
UnionFind uf = new UnionFind(n + 1);
for (int i = 0; i < q; i++) {
String[] query = br.readLine().split(" ");
if (query[0].equals("t"))
uf.unionSet(Integer.parseInt(query[1]), Integer.parseInt(query[2]));
else
pw.println(uf.sizeOfSet(Integer.parseInt(query[1])));
}
pw.close();
// use the UFDS implementation
class UnionFind { ...
Leetcode: Making A Large Island
- inefficient
solution
java
int n = grid.length;
UnionFind uf = new UnionFind(n * n); // n^2 size
int max = 1;
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
if (grid[i][j] == 1) { // check all the islands
int idx = i * n + j;
// union with islands below and to the right, make a big island
if (i + 1 < n && grid[i + 1][j] == 1) {
uf.unionSet(idx, idx + n);
max = Math.max(max, uf.sizeOfSet(idx)); // update max
}
if (j + 1 < n && grid[i][j + 1] == 1) {
uf.unionSet(idx, idx + 1);
max = Math.max(max, uf.sizeOfSet(idx));
}
}
}
}
int[] directions = { 0, 1, 0, -1, 0 }; // 4 cardinal directions
Set<Integer> hs = new HashSet<>();
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
if (grid[i][j] == 0) { // check all non islands
for (int a = 0; a < 4; a++) {
int r = i + directions[a], s = j + directions[a + 1];
if (0 <= r && r < n && 0 <= s && s < n && grid[r][s] != 0)
hs.add(uf.findSet(r * n + s)); // identify the islands that this tile is adjacent to, ignore duplicates
}
int size = 1;
for (int t : hs)
size += uf.sizeOfSet(t); // compute the potential size of joining every adjacent island
max = Math.max(max, size);
hs.clear();
}
}
}
return max;
// use the UFDS implementation
class UnionFind { ...
Extra
Java implementation - from CPBook
java
import java.util.*;
// Union-Find Disjoint Sets Library written in OOP manner, using both path compression and union by rank heuristics
class UnionFind { // OOP style
private ArrayList<Integer> p, rank, setSize;
private int numSets;
public UnionFind(int N) {
p = new ArrayList<>(N);
rank = new ArrayList<>(N);
setSize = new ArrayList<>(N);
numSets = N;
for (int i = 0; i < N; i++) {
p.add(i);
rank.add(0);
setSize.add(1);
}
}
public int findSet(int i) {
if (p.get(i) == i) return i;
else {
int ret = findSet(p.get(i)); p.set(i, ret);
return ret; } }
public Boolean isSameSet(int i, int j) { return findSet(i) == findSet(j); }
public void unionSet(int i, int j) {
if (!isSameSet(i, j)) { numSets--;
int x = findSet(i), y = findSet(j);
// rank is used to keep the tree short
if (rank.get(x) > rank.get(y)) { p.set(y, x); setSize.set(x, setSize.get(x) + setSize.get(y)); }
else { p.set(x, y); setSize.set(y, setSize.get(y) + setSize.get(x));
if (rank.get(x) == rank.get(y)) rank.set(y, rank.get(y) + 1); } } }
public int numDisjointSets() { return numSets; }
public int sizeOfSet(int i) { return setSize.get(findSet(i)); }
}
class unionfind_ds {
public static void main(String[] args) {
System.out.printf("Assume that there are 5 disjoint sets initially\n");
UnionFind UF = new UnionFind(5); // create 5 disjoint sets
System.out.printf("%d\n", UF.numDisjointSets()); // 5
UF.unionSet(0, 1);
System.out.printf("%d\n", UF.numDisjointSets()); // 4
UF.unionSet(2, 3);
System.out.printf("%d\n", UF.numDisjointSets()); // 3
UF.unionSet(4, 3);
System.out.printf("%d\n", UF.numDisjointSets()); // 2
System.out.printf("isSameSet(0, 3) = %b\n", UF.isSameSet(0, 3)); // will return false
System.out.printf("isSameSet(4, 3) = %b\n", UF.isSameSet(4, 3)); // will return true
for (int i = 0; i < 5; i++) // findSet will return 1 for {0, 1} and 3 for {2, 3, 4}
System.out.printf("findSet(%d) = %d, sizeOfSet(%d) = %d\n", i, UF.findSet(i), i, UF.sizeOfSet(i));
UF.unionSet(0, 3);
System.out.printf("%d\n", UF.numDisjointSets()); // 1
for (int i = 0; i < 5; i++) // findSet will return 3 for {0, 1, 2, 3, 4}
System.out.printf("findSet(%d) = %d, sizeOfSet(%d) = %d\n", i, UF.findSet(i), i, UF.sizeOfSet(i));
}
}