寻找无序数组中第k大的数

时间:2022-12-30 15:36:46

对于一个无序的数组,怎样找到其中第k大的数呢?下面总结几种方法。

1.直接排序法

使用常见的归并排序、堆排序等算法对数组进行排序,然后找到第k大的数。排序算法的时间复杂度为O(nlogn),所以算法总的时间复杂度为O(nlogn)。

// Simple C++ program to find k'th biggest element
#include<iostream>
#include<algorithm>
using namespace std;

// Function to return k'th biggest element in a given array
int kthBiggest(int arr[], int n, int k)
{
    // Sort the given array
    sort(arr, arr+n);

    // Return k'th element in the sorted array
    return arr[n+k-1];
}

// Driver program to test above methods
int main()
{
    int a[] = { 8, 5, -4, -6, 9, 3, 7, -1, 2, 0, 15, -11, -13, 14, 12, 20};
    int n = (a, sizeof(a) / sizeof(int));
    cout << "K'th biggest element is " <<  kthBiggest(arr, n, 2);
    return 0;
}

2.堆选择法-最大堆

可以使用最大堆对数组建立堆,只是需要找到第k大的数所以没有必要像直接排序方法中那样对所有数进行排序。建立好最大队后每次都提取出最大的数,提取 k 次就得到了第 k 大的数。建立最大堆的时间复杂度为O(n),提取最大数后每次调整最大队时间复杂为O(logn),所以总的时间复杂度为O(n+k*logn)。

// A C++ program to find k'th smallest element using min heap
#include<iostream>
#include<climits>
using namespace std;

// Prototype of a utility function to swap two integers
void swap(int *x, int *y);

// A class for Max Heap
class MaxHeap
{
    int *harr; // pointer to array of elements in heap
    int capacity; // maximum possible size of max heap
    int heap_size; // Current number of elements in max heap
public:
    MaxHeap(int a[], int size); // Constructor
    void maxHeapify(int i);  //To maxHeapify subtree rooted with index i
    int parent(int i) { return (i - 1) / 2; }
    int left(int i) { return (2 * i + 1); }
    int right(int i) { return (2 * i + 2); }

    int extractMax();  // extracts root (maximum) element
    int getMax() { return harr[0]; } // Returns maximum

    // to replace root with new node x and heapify() new root
    void replaceMax(int x) { harr[0] = x;  maxHeapify(0); }
};

MaxHeap::MaxHeap(int a[], int size)
{
    heap_size = size;
    harr = a;  // store address of array
    int i = (heap_size - 1) / 2;
    while (i >= 0)
    {
        maxHeapify(i);
        i--;
    }
}

// Method to remove maximum element (or root) from max heap
int MaxHeap::extractMax()
{
    if (heap_size == 0)
        return INT_MAX;

    // Store the maximum vakue.
    int root = harr[0];

    // If there are more than 1 items, move the last item to root
    // and call heapify.
    if (heap_size > 1)
    {
        harr[0] = harr[heap_size - 1];
        maxHeapify(0);
    }
    heap_size--;

    return root;
}

// A recursive method to heapify a subtree with root at given index
// This method assumes that the subtrees are already heapified
void MaxHeap::maxHeapify(int i)
{
    int l = left(i);
    int r = right(i);
    int largest = i;
    if (l < heap_size && harr[l] > harr[i])
        largest = l;
    if (r < heap_size && harr[r] > harr[largest])
        largest = r;
    if (largest != i)
    {
        swap(&harr[i], &harr[largest]);
        maxHeapify(largest);
    }
}

// A utility function to swap two elements
void swap(int *x, int *y)
{
    int temp = *x;
    *x = *y;
    *y = temp;
}

// Function to return k'th largest element in a given array
int kthBiggest(int arr[], int n, int k)
{
    // Build a heap of all n numbers
    MaxHeap mh(arr, n);

    int num = 0;
    // every time extract biggest number , k times
    for (int i = 0; i < k; i++)
        num = mh.extractMax();

    return num;
}

// Driver program to test above methods
int main()
{
    int a[] = { 8, 5, -4, -6, 9, 3, 7, -1, 2, 0, 15, -11, -13, 14, 12, 20 };
    int n = (a, sizeof(a) / sizeof(int));
    cout << "K'th biggest element is " << kthBiggest(a, n, 3);
    return 0;
}

3.排序选择法-插入排序

由于是要找 k 个最大的数,所以没有必要对所有数进行完整的排序。每次只保留 k 个当前最大的数就可以,然后每次对新来的元素跟当前 k 个树中最小的数比较,新元素大的话则插入到数组中,否则跳过。循环结束后数组中最小的数即是我们要找到第 k 大的数。
时间复杂度 (n-k)logk

// A C++ program to find k'th smallest element using min heap
#include<iostream>

using namespace std;
void print_array(int a[], int low, int high)
{
    for (int i = low; i <= high; i++){
        cout << a[i] << " ";
    }
    cout << endl;
}

void insert(int a[], int low, int high, int num)
{
    int i = high-1;
    while (i >= low && a[i] < num){
        a[i + 1] = a[i];
        i--;
    }
    a[i + 1] = num;
}

void insertSort(int a[], int low, int high)
{
    for (int i = low + 1; i <= high; i++)
    {
        int temp = a[i];
        int j = i - 1;
        while (j >= low && a[j] < temp)
        {
            a[j + 1] = a[j];
            j--;
        }
        a[j + 1] = temp; //do not swap every time
    }
}

// Function to return k'th largest element in a given array
int kthBiggest(int arr[], int n, int k)
{
    int *a = new int[k];
    for (int i = 0; i < k; i++){
        a[i] = arr[i];
    }
    insertSort(a, 0, k - 1);

    for (int i = k; i < n ; i++){
        if (arr[i]>a[k - 1]) insert(a, 0, k-1, arr[i]);
    }

    int num = a[k - 1];
    delete[] a;
    return num;
}

// Driver program to test above methods
int main()
{
    int a[] = { 8, 5, -4, -6, 9, 3, 7, -1, 2, 0, 15, -11, -13, 14, 12, 20 };
    int n = (a, sizeof(a) / sizeof(int));
    print_array(a, 0, n-1);
    cout << "K'th biggest element is " << kthBiggest(a, n, 3);
    return 0;
}

4.排序选择法-最小堆

上一方法中的临时数组可以替换为最小堆,来找到数组中k个最大的数。先对前 k 个数进行建堆,堆顶为当前k个数组中最小的元素。时间复杂度O(k)。
然后将第 k+1…n 个数依次和堆顶元素比较,如果新元素大于堆顶元素,则将它作为新的堆顶元素并调整最小堆,否则再处理下一个数。这样每一次循环都将保证最小堆中的k个数为当前最大的 k 个数。 时间复杂度O((n-k)*logk)。
循环结束后,堆顶元素即是我们要找的第 k 大的数。
算法总的时间复杂度是O(k+(n-k)*logk)。

// A C++ program to find k'th smallest element using min heap
#include<iostream>
#include<climits>
using namespace std;

// Prototype of a utility function to swap two integers
void swap(int *x, int *y);

// A class for min Heap
class MinHeap
{
    int *harr; // pointer to array of elements in heap
    int capacity; // maximum possible size of max heap
    int heap_size; // Current number of elements in max heap
public:
    MinHeap(int a[], int size); // Constructor
    void MinHeapify(int i);  //To minHeapify subtree rooted with index i
    int parent(int i) { return (i - 1) / 2; }
    int left(int i) { return (2 * i + 1); }
    int right(int i) { return (2 * i + 2); }

    int extractMin();  // extracts root (minimum) element
    int getMin() { return harr[0]; } // Returns minimum

    // to replace root with new node x and heapify() new root
    void replaceMax(int x) { harr[0] = x;  MinHeapify(0); }
};

MinHeap::MinHeap(int a[], int size)
{
    heap_size = size;
    harr = a;  // store address of array
    int i = (heap_size - 1) / 2;
    while (i >= 0)
    {
        MinHeapify(i);
        i--;
    }
}

// Method to remove minimum element (or root) from min heap
int MinHeap::extractMin()
{
    if (heap_size == 0)
        return INT_MAX;

    // Store the minimum vakue.
    int root = harr[0];

    // If there are more than 1 items, move the last item to root
    // and call heapify.
    if (heap_size > 1)
    {
        harr[0] = harr[heap_size - 1];
        MinHeapify(0);
    }
    heap_size--;

    return root;
}


// A recursive method to heapify a subtree with root at given index
// This method assumes that the subtrees are already heapified
void MinHeap::MinHeapify(int i)
{
    int l = left(i);
    int r = right(i);
    int smallest = i;
    if (l < heap_size && harr[l] < harr[i])
        smallest = l;
    if (r < heap_size && harr[r] < harr[smallest])
        smallest = r;
    if (smallest != i)
    {
        swap(&harr[i], &harr[smallest]);
        MinHeapify(smallest);
    }
}

// A utility function to swap two elements
void swap(int *x, int *y)
{
    int temp = *x;
    *x = *y;
    *y = temp;
}

// Function to return k'th largest element in a given array
int kthBiggest(int arr[], int n, int k)
{
    // Build a heap of first k elements: O(k) time
    MinHeap mh(arr, k);

    // Process remaining n-k elements. If current element is
    // bigger than root, replace root with current element
    for (int i = k; i < n; i++)
    if (arr[i] > mh.getMin())
        mh.replaceMax(arr[i]);

    // Return root
    return mh.getMin();
}

// Driver program to test above methods
int main()
{
    int a[] = { 8, 5, -4, -6, 9, 3, 7, -1, 2, 0, 15, -11, -13, 14, 12, 20 };
    int n = (a, sizeof(a) / sizeof(int));
    cout << "K'th biggest element is " << kthBiggest(a, n, 3);
    return 0;
}

5.期望线性时间的方法-随机选择法(使用快速排序)

回忆一下快排算法,每次都根据主元将数组划分为两部分,一部分数全部比主元要小,另一部分全部比主元要大,划分完之后可以得到主元的位置,也就是可以知道主元是数组中第多大的数。那么可以根据此信息,来找到第 k 大的数,每次都只对划分后的一部分数进行处理。如果主元刚好是第 k 大的数则直接返回主元就可以了。否则更具主元的位置来缩小查找空间,迭代查找。
为了得到期望的时间复杂度为线性时间复杂度,快速排序中的主元的选择使用随机数。
该算法的期望时间复杂度为O(n)

// C++ implementation of randomized quickSelect
#include<iostream>
#include<climits>
#include<cstdlib>
using namespace std;

int randomPartition(int arr[], int l, int r);

// This function returns k'th biggest element in arr[l..r] using
// QuickSort based method. ASSUMPTION: ELEMENTS IN ARR[] ARE DISTINCT
int kthBiggest(int arr[], int l, int r, int k)
{
    // If k is smaller than number of elements in array
    if (k > 0 && k <= r - l + 1)
    {
        // Partition the array around a random element and
        // get position of pivot element in sorted array
        int pos = randomPartition(arr, l, r);

        // If position is same as k
        if (pos - l == k - 1)
            return arr[pos];
        if (pos - l > k - 1)  // If position is more, recur for left subarray
            return kthBiggest(arr, l, pos - 1, k);

        // Else recur for right subarray
        return kthBiggest(arr, pos + 1, r, k - pos + l - 1);
    }

    // If k is more than number of elements in array
    return INT_MIN;
}

void swap(int *a, int *b)
{
    int temp = *a;
    *a = *b;
    *b = temp;
}

// Standard partition process of QuickSort(). It considers the last
// element as pivot and moves all smaller element to right of it and
// greater elements to left. This function is used by randomPartition()
int partition(int arr[], int l, int r)
{
    int x = arr[r], i = l;
    for (int j = l; j <= r - 1; j++)
    {
        if (arr[j] > x)
        {
            swap(&arr[i], &arr[j]);
            i++;
        }
    }
    swap(&arr[i], &arr[r]);
    return i;
}

// Picks a random pivot element between l and r and partitions
// arr[l..r] arount the randomly picked element using partition()
int randomPartition(int arr[], int l, int r)
{
    int n = r - l + 1;
    int pivot = rand() % n;
    swap(&arr[l + pivot], &arr[r]);
    return partition(arr, l, r);
}

// Driver program to test above methods
int main()
{
    int a[] = { 8, 5, -4, -6, 9, 3, 7, -1, 2, 0, 15, -11, -13, 14, 12, 20 };
    int n = (a, sizeof(a) / sizeof(int));
    cout << "K'th biggest element is " << kthBiggest(a, 0, n - 1, 2);
    return 0;
}

6.最坏时间为线性时间的方法-BFPRT

算法导论中介绍了一种方法,能保证最坏情况下该算法依旧是线性时间复杂度。
算法步骤:
1.将数组的 n 个数划分为 n/5 组,每组 5 个元素。
2.寻找这 n/5 组中每一组的中位数:首先对每组元素进行插入排序,然后确定每组有序元素的中位数。
3.对第 2 步中找出的 n/5 个中位数,递归调用 BFPRT 查找算法本身来找到这 n/5 个数的中位数 x
4.利用修改过的划分函数,按中位数的中位数 x 对数组进行划分,返回划分完之后 x 的位置。
5.如果 x 刚好是第 k 大的数(划分后大于 x 的数放在前一部分,小于 x 的数放在后半部分,如 x 的位置索引是k-1,则说明 x 是第k大的数),直接返回 x ;若 x 的索引小于k-1,则说明第 k 大的元素在右半部分小于 x 的数中,对右半部分进行递归查找;若 x 的索引大于k-1,则说明第 k 大的元素在左半部分大于 x 的数中,对左半部分进行递归查找。

可以用一个递归式来得到算法的时间复杂度。令T(n)为时间复杂度,则1、2、4都需要O(n)时间,步骤三需要T(n/5),步骤5最多需要 T(7n/10+6),所以递归式:
T(n) <= T(n/5)+T(7n/10)+O(n)
可以求得T(n) = O(n)

//implement max heap sort
#include "iostream"
#include "math.h"

void print_array(int a[], int beg, int end)
{
    for (int i = beg; i <= end; i++)
        std::cout <<  a[i] << " ";
    std::cout << std::endl;
}

// partition and rank from big to small
// ran partition use the a[pivot] as pivot
// and return the new index of the a[pivot]
int partition(int a[], int low, int high, int pivot)
{
    std::swap(a[high], a[pivot]);
    //the index of the first place for replaced
    int index_first_nosmall = low;

    for (int i = low; i < high; i++)
    {
        if (a[i] > a[high])
        {
            std::swap(a[i], a[index_first_nosmall++]);
        }
    }
    std::swap(a[high], a[index_first_nosmall]);
    return index_first_nosmall;
}

//insert sort, from big to small
void InsertSort(int a[], int low, int high)
{
    for (int i = low+1; i <= high; i++)
    {
        int temp = a[i];
        int j = i - 1;
        while (j >= low && a[j] < temp)
        {
            a[j + 1] = a[j];
            j--;
        }
        a[j+1] = temp; //do not swap every time
    }
}

//insert sort
void InsertSort_Lite(int a[], int low, int high)
{
    for (int i = low + 1; i <= high; i++)
    {
        int j = i - 1;
        while (j >= low && a[j] < a[j+1])
        {
            std::swap(a[j], a[j+1]);
            j--;
        }
    }
}

int Find_k_Biggest(int a[], int low, int high, int k)
{
    std::cout << "print_array(a, low, high):" << low << " , " << high <<" k: "<<k<< std::endl;
    print_array(a, low, high);

    if (high + 1 - low <= 5)
    {
        InsertSort(a, low, high);
        return a[low + k - 1];
    }

    int j = low;
    for (int i = 0; i <= high; i+=5)
    {
        int end = (i + 4 <= high ? (i + 4) : high);
        InsertSort(a, i, end);
        print_array(a, i, end);
        std::swap(a[j++], a[(i+end)>>1]);
        if (end != i + 4) break;
    }
    print_array(a, low, high);

    //find the median of the medians
    int pivot = (low + j -1) >> 1;
    Find_k_Biggest(a, low, j-1, pivot+1-low);

    //use pivot to partition the array
    int resulted_pivot = partition(a, low, high, pivot);
    int current_rank = resulted_pivot + 1 - low;

    if (current_rank == k) return a[resulted_pivot];
    else if (resulted_pivot < k) return Find_k_Biggest(a, resulted_pivot+1, high, k-resulted_pivot);
    else return Find_k_Biggest(a, low, resulted_pivot - 1, k);
}

int main()
{

    int a[] = { 8, 5, -4, -6, 9, 3, 7, -1, 2, 0, 15, -11, -13, 14, 12, 20};
    int n = (a, sizeof(a) / sizeof(int));

    print_array(a, 0, n-1);

    std::cout << Find_k_Biggest(a, 0, n - 1, 2) << std::endl;;
    print_array(a, 0, n-1);

    return 0;
}

参考

http://www.geeksforgeeks.org/kth-smallestlargest-element-unsorted-array/
http://www.geeksforgeeks.org/kth-smallestlargest-element-unsorted-array-set-2-expected-linear-time/
http://www.geeksforgeeks.org/kth-smallestlargest-element-unsorted-array-set-3-worst-case-linear-time/