union-find disjoint sets

Complete

Summary

Union-find disjoint sets(UFDS)

  • store disjoint sets
  • union sets or check if two elements are in the same set

Hash table implementation

  • table values are pointers to the parent entry
  • harder to keep track of rank and size
OperationMethodPerformance
findSet(i)follow the pointer until the the entry where the pointer points to itself-
unionSet(i,j)change the pointer of one of the roots to the other-

assuming path compression is used, might deteriorate to otherwise

012345678

Concept

UFDS operations

  • findSet(i)
    • get an identifier for the set that i belongs to
  • unionSet(i,j)
    • join the sets that i and j belong to into one set

Abstraction

  • as a forest, where each tree represents a disjoint set/partition
  • the root acts as the identifier for that set
  • 3 disjoint sets
012345678

Path compression

  • keep the trees as flat as possible
java
public int findSet(int i) { 
	if (p.get(i) == i) // end case when we reach the root node
		return i;
	else {
		int ret = findSet(p.get(i)); // traverse further to find the root
		p.set(i, ret); // set the root as the parent of the current node, flattening
		return ret; 
	} 
}
0123401234

even though findSet(i) might be on tall trees, using path compression amortizes it to

Union by rank hueristic

  • rank != height, since height might decrease after path compression, but rank won’t
  • join the shorter tree into the taller tree
  • if they are the same rank, then the rank of the union only increases by 1
java
public void unionSet(int i, int j) { 
	if (!isSameSet(i, j)) { // ensure that they are not already part of the same set
		numSets--;
		int x = findSet(i), y = findSet(j);
		// rank is used to keep the tree short
		if (rank.get(x) > rank.get(y)) { 
			p.set(y, x); // put j's set under i's, if it has a smaller rank
			setSize.set(x, setSize.get(x) + setSize.get(y)); 
		} else {
			p.set(x, y); // put i's set under j's by default
			setSize.set(y, setSize.get(y) + setSize.get(x));
			if (rank.get(x) == rank.get(y)) 
				rank.set(y, rank.get(y) + 1); 
		} 
	} 
}
03123012
012345678012345678

rank/height will increase by at most 1

012345678012345786012345786

rank doesn’t decrease like height might, and for trees of equal initial rank, i is unioned into j

Tall trees

  • minimum size of the tree in order of a certain height
  • since height increases by 1 at most
0123h=2;s=401234567h=3;s=8

Inverse ackermann function

  • a function that grows very slowly

Application

Kattis: Tildes

java
BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
PrintWriter pw = new PrintWriter(System.out);

String[] one = br.readLine().split(" ");
int n = Integer.parseInt(one[0]), q = Integer.parseInt(one[1]);

UnionFind uf = new UnionFind(n + 1);

for (int i = 0; i < q; i++) {
	String[] query = br.readLine().split(" ");
	if (query[0].equals("t"))
		uf.unionSet(Integer.parseInt(query[1]), Integer.parseInt(query[2]));
	else
		pw.println(uf.sizeOfSet(Integer.parseInt(query[1])));
}

pw.close();

// use the UFDS implementation
class UnionFind { ...

Leetcode: Making A Large Island

  • inefficient solution
java
int n = grid.length;

UnionFind uf = new UnionFind(n * n); // n^2 size
int max = 1;
for (int i = 0; i < n; i++) {
	for (int j = 0; j < n; j++) {
		if (grid[i][j] == 1) { // check all the islands
			int idx = i * n + j;
			// union with islands below and to the right, make a big island
			if (i + 1 < n && grid[i + 1][j] == 1) {
				uf.unionSet(idx, idx + n);
				max = Math.max(max, uf.sizeOfSet(idx)); // update max
			}
			if (j + 1 < n && grid[i][j + 1] == 1) {
				uf.unionSet(idx, idx + 1);
				max = Math.max(max, uf.sizeOfSet(idx));
			}
		}
	}
}

int[] directions = { 0, 1, 0, -1, 0 }; // 4 cardinal directions
Set<Integer> hs = new HashSet<>();

for (int i = 0; i < n; i++) {
	for (int j = 0; j < n; j++) {
		if (grid[i][j] == 0) { // check all non islands
			for (int a = 0; a < 4; a++) {
				int r = i + directions[a], s = j + directions[a + 1];
				if (0 <= r && r < n && 0 <= s && s < n && grid[r][s] != 0)
					hs.add(uf.findSet(r * n + s)); // identify the islands that this tile is adjacent to, ignore duplicates
			}
				int size = 1;
				for (int t : hs)
					size += uf.sizeOfSet(t); // compute the potential size of joining every adjacent island
				max = Math.max(max, size);
				hs.clear();
		}
	}
}

return max;

// use the UFDS implementation
class UnionFind { ...

Extra

Java implementation - from CPBook

java
import java.util.*;

// Union-Find Disjoint Sets Library written in OOP manner, using both path compression and union by rank heuristics
class UnionFind {                                              // OOP style
  private ArrayList<Integer> p, rank, setSize;
  private int numSets;

  public UnionFind(int N) {
    p = new ArrayList<>(N);
    rank = new ArrayList<>(N);
    setSize = new ArrayList<>(N);
    numSets = N;
    for (int i = 0; i < N; i++) {
      p.add(i);
      rank.add(0);
      setSize.add(1);
    }
  }

  public int findSet(int i) { 
    if (p.get(i) == i) return i;
    else {
      int ret = findSet(p.get(i)); p.set(i, ret);
      return ret; } }

  public Boolean isSameSet(int i, int j) { return findSet(i) == findSet(j); }

  public void unionSet(int i, int j) { 
    if (!isSameSet(i, j)) { numSets--; 
    int x = findSet(i), y = findSet(j);
    // rank is used to keep the tree short
    if (rank.get(x) > rank.get(y)) { p.set(y, x); setSize.set(x, setSize.get(x) + setSize.get(y)); }
    else                           { p.set(x, y); setSize.set(y, setSize.get(y) + setSize.get(x));
                                     if (rank.get(x) == rank.get(y)) rank.set(y, rank.get(y) + 1); } } }
  public int numDisjointSets() { return numSets; }
  public int sizeOfSet(int i) { return setSize.get(findSet(i)); }
}

class unionfind_ds {
  public static void main(String[] args) {
    System.out.printf("Assume that there are 5 disjoint sets initially\n");
    UnionFind UF = new UnionFind(5); // create 5 disjoint sets
    System.out.printf("%d\n", UF.numDisjointSets()); // 5
    UF.unionSet(0, 1);
    System.out.printf("%d\n", UF.numDisjointSets()); // 4
    UF.unionSet(2, 3);
    System.out.printf("%d\n", UF.numDisjointSets()); // 3
    UF.unionSet(4, 3);
    System.out.printf("%d\n", UF.numDisjointSets()); // 2
    System.out.printf("isSameSet(0, 3) = %b\n", UF.isSameSet(0, 3)); // will return false
    System.out.printf("isSameSet(4, 3) = %b\n", UF.isSameSet(4, 3)); // will return true
    for (int i = 0; i < 5; i++) // findSet will return 1 for {0, 1} and 3 for {2, 3, 4}
      System.out.printf("findSet(%d) = %d, sizeOfSet(%d) = %d\n", i, UF.findSet(i), i, UF.sizeOfSet(i));
    UF.unionSet(0, 3);
    System.out.printf("%d\n", UF.numDisjointSets()); // 1
    for (int i = 0; i < 5; i++) // findSet will return 3 for {0, 1, 2, 3, 4}
      System.out.printf("findSet(%d) = %d, sizeOfSet(%d) = %d\n", i, UF.findSet(i), i, UF.sizeOfSet(i));
  }
}