Sorting

A Stellar Hiker

2014-12-25

Algorithm, C++, Sort, leetcode

常见的排序算法

插入排序

Straight Insertion Sort
Runtime: $O(n^2)$ . Memory: $O(1)$ . 稳定。

插入排序算法解释

基本操作是将一个记录插入到已经排好序的有序表中，从而得到一个新的、记录数增1的有序表。
从左到右渐次处理，始终保证当前位置左边的数组是有序的，然后将当前位置的数字插入到左侧的有序数组中。

代码

#include <iostream>
void print_array(int *nums, int n)
{
	if (!nums || n < 1) return;
	for (int i = 0; i < n; ++i)
	{
		std::cout<<nums[i]<<" ";
	}
	std::cout<<std::endl;
}
void insert_sort(int* num_array, int n)
{
	if(!num_array || n < 2) return;
	for (int i = 1; i < n; ++i)
	{
		if (num_array[i] < num_array[i-1])
		{
			int num(num_array[i]);
			int j(i-1);
			while (num_array[j] > num && j >= 0)
			{
				num_array[j+1] = num_array[j];
				--j;
			}
			num_array[j+1] = num;
		}
	}
}
int main()
{
	int test1[1] = {1};
	int test2[2] = {2, 1};
	int test3[10] = {12, 6, 84, 25, 67, 0, -56, -2, 5, -100};
	insert_sort(NULL, 0);
	insert_sort(test1, 1);
	insert_sort(test2, 2);
	insert_sort(test3, 10);
	print_array(NULL, 0);
	print_array(test1, 1);
	print_array(test2, 2);
	print_array(test3, 10);
	system("Pause");
}

用模板的写法：

#include <iterator>

// 对vector等类型应该进行模板特化以提速，使用RandomAccessIterator的特性。
template<typename ForwardIterator, typename BinaryPredicate>
void insert_sort(ForwardIterator begin, ForwardIterator end, BinaryPredicate comp)
{
	iterator_traits<ForwardIterator>::difference_type count;
	count = std::distance(begin, end);
	if (count <= 0)
		return;
	ForwardIterator cur(begin);
	while (std::distance(cur, end) > 1)
	{
		ForwardIterator left(cur);
		advance(cur, 1);
		ForwardIterator right(cur);
		while (!comp(*left, *right))
		{
			std::swap(*left, *right);
			if (std::distance(begin, left) < 1)
				break;
			std::advance(left, -1);	// use advance instead of ++ to meet limit of ForwardIterator
			std::advance(right, -1);
		}
	}
}

希尔排序

Shell’s Sort
Runtime: $O(n^1.25)$ ~ $O(1.6n^1.25)$ ，是n和d的函数. Memory: $O(1)$ . 不稳定。

希尔排序算法解释

也称作“缩小增量排序”，也是一种属于插入排序类的方法。
基本思想是，先将整个待排记录序列分割成为若干子序列分别进行直接插入排序，待整个序列中的记录“基本有序”时，再对全体记录进行一次直接插入排序。

代码

/*
#include <iostream>
#include <vector>
void print_array(int *nums, int n)
{
	if (!nums || n < 1) return;
	for (int i = 0; i < n; ++i)
	{
		std::cout<<nums[i]<<" ";
	}
	std::cout<<std::endl;
}
void shell_insert(int* num_array, int n, int dk)
{
	for (int i = dk; i < n; i += dk)
	{
		if (num_array[i] < num_array[i-dk])
		{
			int num(num_array[i]);
			int j(i - dk);
			while (num_array[j] > num && j >= 0)
			{
				num_array[j+dk] = num_array[j];
				j -= dk;
			}
			num_array[j+dk] = num;
		}
	}
}
void shell_sort(int* num_array, int n, int* dlta, int t)
{
	// 按增量序列dlta[0...t-1]对顺序表num_array作希尔排序。
	for (int k = 0; k < t; ++k)
	{
		shell_insert(num_array, n, dlta[k]);
	}
}
int main()
{
	int test1[1] = {1};
	int test2[2] = {2, 1};
	int test3[10] = {12, 12, 84, 25, 67, 0, 56, 2, 5, 100};
	int dlta[3] = {5, 3, 1};
	shell_sort(NULL, 0, dlta, 3);
	shell_sort(test1, 1, dlta, 3);
	shell_sort(test2, 2, dlta, 3);
	shell_sort(test3, 10, dlta, 3);
	print_array(NULL, 0);
	print_array(test1, 1);
	print_array(test2, 2);
	print_array(test3, 10);
	system("Pause");
}
*/

用模板的写法：

#include <iterator>

template<typename ForwardIterator, typename BinaryPredicate>
void insert_sort_inside_shell(ForwardIterator begin, ForwardIterator end, int dist, BinaryPredicate comp)
{
	iterator_traits<ForwardIterator>::difference_type count;
	count = std::distance(begin, end);	
	if (count <= 0 || dist <= 0 || count < dist+1)
		return;

	ForwardIterator cur(begin);
	while (std::distance(cur, end) > dist)
	{
		ForwardIterator left(cur);
		advance(cur, dist);
		ForwardIterator right(cur);
		while (!comp(*left, *right))
		{
			std::swap(*left, *right);
			if (std::distance(begin, left) < dist)
				break;
			advance(left, -dist);
			advance(right, -dist);
		}
	}
}

template<typename ForwardIterator, typename BinaryPredicate>
void shell_sort(ForwardIterator begin, ForwardIterator end, std::vector<int> dists, BinaryPredicate comp)
{
	if (dists.empty())
		return;
	iterator_traits<ForwardIterator>::difference_type count;
	count = std::distance(begin, end);
	if (count <= 0)
		return;
	for (auto i = dists.begin(); i != dists.end(); ++i)
	{
		for (int j = 0; j < *i; ++j)
		{
			ForwardIterator cur(begin);
			advance(cur, j);
			insert_sort_inside_shell(cur, end, *i, comp);
		}
	}
}

冒泡排序

Bubble Sort
Runtime: $O(n^2)$ average and worst case. Memory: $O(1)$ . 稳定。

冒泡排序算法解释：

每次遍历比较表中的相邻元素，如果它们是逆序的话就交换它们的位置。重复n-1次后，列表即排好序。

代码

// 假设列表是数组
void buble_sort(int* A, int n)
{
	for (int i = n-1; i > 0; --i)
	{
		for (int j = 0; j < i; ++j)
		{
			if (A[j] > A[j+1]) {swap(A[j], A[j+1]);}
		}
	}
}

用模板的方式：

#include <iterator>

template<typename ForwardIterator, typename BinaryPredicate>
void bubble_sort(ForwardIterator begin, ForwardIterator end, BinaryPredicate comp)
{
	iterator_traits<ForwardIterator>::difference_type count;
	count = std::distance(begin, end);
	if (count < 2)
		return;
	
	for (int i = (count - 1); i > 0; --i)
	{
		for (int j = 1; j <= i; ++j)
		{
			ForwardIterator left(begin), right(begin);
			advance(left, j - 1);
			advance(right, j);
			if (!comp(*left, *right))
			{
				swap(*left, *right);
			}
		}
	}
}

选择排序

Selection Sort
Runtime: $O(n^2)$ average and worst case. Memory: $O(1)$ . 稳定。

选择排序算法解释：

第一遍遍历从1到n的n个元素，将找到的最小元素与位置1的元素交换。第二遍遍历从2到n的n-1个元素，将找到的最小元素与位置2的元素交换。以此类推。n-1遍后，该列表就排好序了。

代码

void selection_sort(int* A, int n)
{
	for (int i = 0; i < (n - 1); ++i)
	{
		int min_index(i);
		for (int j = i + 1; j < n; ++j)
		{
			if (A[j] < A[min_index]) {min_index = j;}
		}
		swap(A[i], A[min_index]);
	}
}

堆排序

Heap Sort
Runtime: $O(nlogn)$ . Memory: $O(1)$ . 不稳定。

堆排序算法解释

左右无序，上下有序的完全二叉树，称为堆。在输出堆顶的最小值之后，使得剩余n-1个元素的序列重又建成一个堆，则得到n个元素中的次小值。如此反复执行，便能得到一个有序序列，这个过程称之为堆排序。
初始建立堆。输出堆顶元素后，调整剩余元素成为一个新的堆。适用于n较大的文件。
从一个无序序列堆建堆的过程就是一个反复筛选的过程。若将此序列看成一个完全二叉树，则最后一个非终端结点是第n/2个元素，就从它开始。
筛选的过程是自上而下，堆顶元素与其左右子树比较并调整的过程。

代码

#include <iostream>
#include <vector>
void print_array(int *nums, int n)
{
	if (!nums || n < 1) return;
	for (int i = 0; i < n; ++i)
	{
		std::cout<<nums[i]<<" ";
	}
	std::cout<<std::endl;
}
void swap(int& left, int& right)
{
	if (left == right) return;
	left = left ^ right;
	right = left ^ right;
	left = left ^ right;
}
void heap_adjust(int* num_array, int start, int end)
{
	if (!num_array || start >= end) return;
	// 已知num_array[start...end]中记录的关键字除了num_array[start]外均满足
	// 堆的定义。本函数调整num_array[start]，使num_array[start...end]成为一
	// 个大顶堆。
	int i(start * 2 + 1);
	// 沿当前元素较大的孩子节点向下筛选。
	while (i <= end)
	{
		if (i < end && num_array[i + 1] > num_array[i])	// i为当前元素较大的记录的下标。
			++i;
		if (num_array[start] > num_array[i])	// num就应该在当前的位置上。
			break;
		swap(num_array[start], num_array[i]);	// 这个比较大的孩子与父节点交换位置。。
		start = i;	// 待比较位置改到下一层孩子的位置。
		i = i * 2 + 1;
	}
}
void heap_sort(int* num_array, int n)
{
	if (!num_array || n < 2) return;
	for (int i = n/2-1; i >= 0; --i)
	{
		heap_adjust(num_array, i, n-1);	// 把num_array建成大顶堆。
	}
	for (int i = n-1; i > 0; --i)
	{
		swap(num_array[0], num_array[i]);	// 每次把堆顶最大的那个放到最后一个未排序的位置。
		heap_adjust(num_array, 0, i-1);
	}
}
int main()
{
	int test1[1] = {1};
	int test2[2] = {2, 1};
	int test3[10] = {12, 12, 84, -25, 67, 0, 56, 2, 5, 100};
	heap_sort(NULL, 0);
	heap_sort(test1, 1);
	heap_sort(test2, 2);
	heap_sort(test3, 10);
	print_array(NULL, 0);
	print_array(test1, 1);
	print_array(test2, 2);
	print_array(test3, 10);
	system("Pause");
}

使用模板：

#include <iterator>


template<typename ForwardIterator, typename BinaryPredicate>
void heap_adjust(ForwardIterator begin, int start_idx, int end_idx, BinaryPredicate comp)
{
	int cur(2 * start_idx + 1);
	while (cur < end_idx)
	{
		ForwardIterator father(begin), son(begin);
		advance(father, start_idx);
		advance(son, cur);
		if (cur < end_idx - 1)
		{
			ForwardIterator sibling(son);
			++sibling;
			if (comp(*son, *sibling))
			{
				son = sibling;
				++cur;
			}
		}
		if (comp(*father, *son))
			break;
		swap(*father, *son);
		start_idx = start_idx * 2 + 1;
		cur = cur * 2 + 1;
	}
}

template<typename ForwardIterator, typename BinaryPredicate>
void heap_sort(ForwardIterator begin, ForwardIterator end, BinaryPredicate comp)
{
	// special cases
	iterator_traits<ForwardIterator>::difference_type count;
	count = std::distance(begin, end);
	if (count < 2)
		return;
	//build heap from n/2
	for (int i = (count + 1) / 2 - 1; i >= 0; --i)
	{
		heap_adjust(begin, i, count, comp);
	}
	// sort, swap max and last
	for (int i = count - 1; i > 0; --i)
	{
		ForwardIterator cur(begin);
		advance(cur, i);
		swap(*begin, *cur);
		heap_adjust(begin, 0, i, comp);
	}
}

STL中的Heap操作

make_heap() 把一序列元素变成heap。
push_heap() 添加一个元素到heap。
pop_heap() 从heap中移除下一个元素。
sort_heap() 把一个heap转换成一个有序队列，此后就不再是heap了。

归并排序

Merge Sort
Runtime: $O(nlog(n))$ average and worst case. Memory: Depends. 稳定。

归并排序算法解释：

归并排序将数组分成两半，然后分别对其排序，然后再将它们合并到一起。每次的排序操作应用都是这个算法，是合并操作承担了主要的工作。
合并操作将两半数组拷入到一个帮助数组里，分别用helperLeft和helperRight记录两半数组各自的头元素。然后遍历helper，把小的那个元素拷到目标数组。最后剩下的元素拷到目标数组后面。

代码

#include <iostream>
void merge_sort(int* num_array, int* helper_array, int low, int high);
void merge(int* num_array, int* helper_array, int low, int middle, int high);
void merge_sort(int* num_array, int n)
{
	if (!num_array || n < 2) return;
	int* helper_array = new int[n];
	merge_sort(num_array, helper_array, 0, n-1);
}
void merge_sort(int* num_array, int* helper_array, int low, int high)
{
	if (low >= high) return;
	int middle(low + (high - low) / 2);
	merge_sort(num_array, helper_array, low, middle);	// sort left half
	merge_sort(num_array, helper_array, middle+1, high);	// sort right half
	merge(num_array, helper_array, low, middle, high);	// merge them
}
void merge(int* num_array, int* helper_array, int low, int middle, int high)
{
	// Copy both halves into a helper array
	for (int i = low; i <= high; ++i)
	{
		helper_array[i] = num_array[i];
	}
	int helper_left(low);
	int helper_right(middle+1);
	int current(low);
	// Iterate through helper array. Compare the left and right half, 
	// copying back the smaller element from the two halves into the
	// original array.
	while (helper_left <= middle && helper_right <= high)
	{
		if (helper_array[helper_left] <= helper_array[helper_right])
		{
			num_array[current] = helper_array[helper_left];
			++helper_left;
		}else
		{
			num_array[current] = helper_array[helper_right];
			++helper_right;
		}
		++current;
	}
	// Copy the rest of the left side of the array into the target array.
	int remaining = middle - helper_left;
	// Notice only the remaining elements form the left half of the helper
	// array are copied into the target array. Cause the right half it's 
	// ALREADY there.
	for (int i = 0; i <= remaining; ++i)
	{
		num_array[current + i] = helper_array[helper_left + i];
	}
}
void print_array(int *nums, int n)
{
	if (!nums || n < 1) return;
	for (int i = 0; i < n; ++i)
	{
		std::cout<<nums[i]<<" ";
	}
	std::cout<<std::endl;
}
int main()
{
	int test1[1] = {1};
	int test2[2] = {2, 1};
	int test3[10] = {12, 6, 84, 25, 67, 0, -56, -2, 5, -100};
	merge_sort(NULL, 0);
	merge_sort(test1, 1);
	merge_sort(test2, 2);
	merge_sort(test3, 10);
	print_array(NULL, 0);
	print_array(test1, 1);
	print_array(test2, 2);
	print_array(test3, 10);
	system("Pause");
}

快速排序

Quick Sort
Runtime: $O(nlog(n))$ average, $O(n^2)$ worst case. Memory: $O(log(n))$ . 不稳定。

快速排序算法解释：

在快速排序算法中，我们随机选择一个元素并分割数组，将小于所选元素的元素放在大于所选元素的元素之前。分组操作可以用一系列swap操作高效实现。
持续分割数组，数组最终会完全排序。不过我们所选择的元素无法保证接近中值，所以最坏情况下的时间复杂度是 $O(n^2)$ 。

代码

#include <iostream>
void print_array(int *nums, int n)
{
	if (!nums || n < 1) return;
	for (int i = 0; i < n; ++i)
	{
		std::cout<<nums[i]<<" ";
	}
	std::cout<<std::endl;
}
void swap(int* num_array, int left, int right)
{
	if (!num_array || left >= right || num_array[left] == num_array[right]) return;
	num_array[left] = num_array[left] ^ num_array[right];
	num_array[right] = num_array[left] ^ num_array[right];
	num_array[left] = num_array[left] ^ num_array[right];
}
int partition(int* num_array, int left, int right);
void quick_sort(int* num_array, int left, int right)
{
	if (!num_array || left >= right) return;
	int index = partition(num_array, left, right);
	if (left < index - 1)	// Sort left half
	{
		quick_sort(num_array, left, index - 1);
	}
	if (right > index)	// Sort right half
	{
		quick_sort(num_array, index, right);
	}
}
int partition(int* num_array, int left, int right)
{
	int pivot = num_array[(left + right) / 2];	// Pick pivot point
	while (left <= right)
	{
		// Find element on left that should be on right
		while (num_array[left] < pivot && left <= right) ++left;
		// Find element on right that should be on left
		while (num_array[right] > pivot && left <= right) --right;
		// Swap elements, and move left and right indices
		if (left <= right)
		{
			swap(num_array, left, right);	// Swaps elements
			++left;
			--right;
		}
	}
	return left;
}
int main()
{
	int test1[1] = {1};
	int test2[2] = {2, 1};
	int test3[10] = {12, 12, 84, 25, 67, 0, -56, -2, 5, -100};
	quick_sort(NULL, 0, -1);
	quick_sort(test1, 0, 0);
	quick_sort(test2, 0, 1);
	quick_sort(test3, 0, 9);
	print_array(NULL, 0);
	print_array(test1, 1);
	print_array(test2, 2);
	print_array(test3, 10);
	system("Pause");
}

模板实现的partition:

template<typename ForwardIterator, typename UnaryPredicate>
ForwardIterator partition(ForwardIterator first, ForwardIterator last, UnaryPredicate pred)
{
		auto pos = first;
		for (; first != last; ++first)
			if (pred(*first))
				swap(*first, *pos++);
		return pos;
}

基数排序

Radix Sort
Runtime: $O(kn)$ , n is the number of elements, k is the number of passes of the sorting algorithm. 稳定。

基数排序算法解释：

基数排序是利用整型数只有有限数目的比特数的特点，只对正整数（或一些其他的数据结构）进行排序的算法。在基数排序中，我们遍历一个数目的每一个数字，按数字将数目分组。
举例来说，如果我们有一列整型数字，可以先按最后一个数字排序，所有0结尾的放在一组。然后我们再对前一个数字继续排序。重复这个过程，直到整个数组都已排序。先从最低位开始排，最后到最高位。

代码

#include <iostream>
#include <vector>
void print_array(int *nums, int n)
{
	if (!nums || n < 1) return;
	for (int i = 0; i < n; ++i)
	{
		std::cout<<nums[i]<<" ";
	}
	std::cout<<std::endl;
}
void radix_sort(int* num_array, int n, int k)
{
	if (!num_array || n < 2 || k < 1) return;
	std::vector<int> bucket[10];
	// Iterate through each radix until i > k
	for (int i = 1; i <= k; i *= 10)
	{
		// sort array of numbers into buckets, from the smallest radix to the largest
		for (int j = 0; j < n; ++j)
		{
			bucket[(num_array[j] / i) % 10].push_back(num_array[j]);
		}
		// merge buckets back to array
		for (int num = 0, m = 0; m < 10; ++m)
		{
			for (std::vector<int>::iterator it = bucket[m].begin(); it != bucket[m].end(); ++it)
			{
				num_array[num++] = *it;
			}
			bucket[m].clear();
		}
	}
}
int main()
{
	int test1[1] = {1};
	int test2[2] = {2, 1};
	int test3[10] = {12, 12, 84, 25, 67, 0, 56, 2, 5, 100};
	radix_sort(NULL, 0, 100);
	radix_sort(test1, 1, 100);
	radix_sort(test2, 2, 100);
	radix_sort(test3, 10, 100);
	print_array(NULL, 0);
	print_array(test1, 1);
	print_array(test2, 2);
	print_array(test3, 10);
	system("Pause");
}

比较

排序方法	平均时间	最坏情况	辅助存储
插入排序	$O(n^2)$	$O(n^2)$	$O(1)$
冒泡排序	$O(n^2)$	$O(n^2)$	$O(1)$
简单选择排序	$O(n^2)$	$O(n^2)$	$O(1)$
快速排序	$O(nlogn)$	$O(n^2)$	$O(logn)$
堆排序	$O(nlogn)$	$O(nlogn)$	$O(1)$
归并排序	$O(nlogn)$	$O(nlogn)$	$O(n)$
基数排序	$O(d(n+rd))$	$O(d(n+rd))$	$O(rd)$

从平均时间性能而言，快速排序最佳，其所需时间最省，但快速排序在最坏情况下的时间性能不如堆排序和归并排序。而后两者比较的结果是，在n较大时，归并排序所需时间较堆排序省，但它所需的辅助存储量最多。
插入排序、冒泡排序、简单选择排序都是简单排序，其中直接插入排序最简单。当序列中的记录“基本有序”或n值较小时，它是最佳的排序方法，因此常将它和其他的排序方法，诸如快速排序、归并排序等结合在一起使用。
基数排序适用于n值很大而关键字较小的序列。若关键字也很大，而序列中大多数记录的“最高位关键字”均不同，则亦可先按“最高位关键字”不同将序列分成若干“小”的子序列，而后进行直接插入排序。
从方法的稳定性来比较，基数排序是稳定的内部排序方法，所有时间复杂度为 $O(n^2)$ 的简单排序法也是稳定的。然而，快速排序、堆排序和希尔排序等时间性能较好的排序方法都是不稳定的。

leetcode相关题目

单链表适合用归并排序，双向链表适合用快速排序。
用标准库的sort时，注意活用各种可能的sorting标准。
Merge Sorted Array
Merge Two Sorted Lists
Merge k Sorted Lists
Insertion Sort List
Sort List
First Missing Positive
Sort Colors
Merge Intervals
Insert Interval
Largest Number
Maximum Gap
H-Index
Valid Anagram
Wiggle Sort II

[1] Cracking the Code Interview
[2] leetcode
[3] 数据结构(C语言版)