diff --git a/search/binary_search.py b/search/binary_search.py new file mode 100644 index 0000000..6428cd4 --- /dev/null +++ b/search/binary_search.py @@ -0,0 +1,116 @@ +""" +使用二分查找的前提是,查找的序列是已经排序过的,时间复杂度为 O(nlog2n) +1. 使用两个指针low, high,分别指向第一个元素和最后一个元素 +2. 取low 和 high 之间的中间值,并将此值与要查找的值比较 + (2.1) 若小于查找值,向右缩小范围,low 移向中间,high 不变 + (2.2) 若大于查找值,则向左缩小范围,high 移向中间,low 不变 +3. 重复第2步 +""" + + +def binary_search(sequence: list[int], value): + low = 0 + high = len(sequence) - 1 + + mid = (low + high) // 2 + + while low != high: + if sequence[mid] > value: + high = mid - 1 + elif sequence[mid] < value: + low = mid + 1 + else: + return mid + + mid = (low + high) // 2 + + if sequence[low] == value: + return low + else: + return False + + +def binary_search_recur(sequence, value, low, high): + if sequence[(low + high) // 2] == value: + return (low + high) // 2 + if low == high: + return low if sequence[low] == value else False + if sequence[(low + high) // 2] > value: + return binary_search_recur(sequence, value, low, (low + high) // 2 - 1) + if sequence[(low + high) // 2] < value: + return binary_search_recur(sequence, value, (low + high) // 2 + 1, high) + + +""" +使用内插查找:时间复杂度为 O(loglog2n) +内插查找是一种二分查找的变形,适合在排序数据中进行查找。 + +内插查找不是像二分查找算法中那样直接使用中值来定界,而是通过插值算法找到上下 +界。 + +类似于计算一条直线的函数: y=kx。在已排序的序列中,取两点 [x0, y0], [x1, y1],即可计算出这两 +点之间的任何值:(x-x0)/(y-y0) = (x0-x1)/(y0-y1) x=(y-y0)(x1-x0)/(y1-y0) + x0 => 直线斜率计算式 + +因此对于一个序列,下标可作为x,值为y + +1. 取第一个元素的下表和值为 x0, y0,最后的元素的下标和值为x1, y1 +2. 将要查找的值作为 y, 通过斜率公式计算对应的 x +3. 取下标为 x 的值,若值大于 y,将这个下标和值作为上界,反之作为下界 +4. 重复2,3 +""" + + +def interpolation_search(sequence, value): + low = 0 + high = len(sequence) - 1 + + while low < high: + x = (value - sequence[low]) * (high - low) // (sequence[high] - sequence[low]) + low + if sequence[low] > value: + return False + if sequence[x] > value: + high = x - 1 + elif sequence[x] < value: + low = x + 1 + else: + return x + if sequence[low] == value: + return low + else: + return False + + +""" +指数查找:它划分中值的方法不是使用平均或插值而是用指数函数来估计,这样可以快速找到上界 +该算法适合已排序且无边界的数据。 + +算法查找过程中不断比较 2^0, 2^1, 2^2, 2^k 位置上的值和目标值的关系,进而确定搜索区域,之后在 +该区域内使用二分查找算法查找 + +假设要在 [2,3,4,6,7,8,10,13,15,19,20,22,23,24,28] 这个 15 个元素已排序集合中查找 22, +那么首先查看 2 +0 = 1 位置上的数字是否超过 22,得到 3 < 22,所以继续查找 2^1, 2^2, 2^3 位置 +处元素,发现对应的值 4, 7, 15 均小于 22。继续查看 16 = 24 处的值,可是 16 大于集合元 +素个数,超出范围了,所以查找上界就是最后一个索引 14。 + +注意下界是 high 的一半,能找到一个上界,那么说明前一次访问处也就是 2^(n-1) +一定小于待查找的值,作为下界是合理的 +""" + + +def exponential_search(sequence: list[int], value): + size = len(sequence) + # 由于下界取上界的一半,所以 high 从 1 开始 + high = 1 + while high < size and sequence[high] < value: + high <<= 1 + + low = high >> 1 + res = binary_search(sequence[low: high + 1], value) + + return res + low if res else res + + +sequence = [1, 4, 6, 10, 14, 18, 24, 39, 50] +num1 = 10 +exponential_search(sequence, num1) diff --git a/test/test_search.py b/test/test_search.py new file mode 100644 index 0000000..56bd034 --- /dev/null +++ b/test/test_search.py @@ -0,0 +1,44 @@ +from search.binary_search import * + +def test_binary_search(): + sequence = [1, 4, 6, 10, 14, 18, 24, 39, 50] + num1 = 10 + num2 = 11 + num3 = 50 + + assert 3 == binary_search(sequence, num1) + assert not binary_search(sequence, num2) + assert 8 == binary_search(sequence, num3) + + +def test_binary_search_recur(): + sequence = [1, 4, 6, 10, 14, 18, 24, 39, 50] + num1 = 10 + num2 = 11 + num3 = 50 + + assert 3 == binary_search_recur(sequence, num1, 0, len(sequence) - 1) + assert not binary_search_recur(sequence, num2, 0, len(sequence) - 1) + assert 8 == binary_search_recur(sequence, num3, 0, len(sequence) - 1) + + +def test_interpolation_search(): + sequence = [1, 4, 6, 10, 14, 18, 24, 39, 50] + num1 = 10 + num2 = 11 + num3 = 50 + + assert 3 == interpolation_search(sequence, num1,) + assert not interpolation_search(sequence, num2, ) + assert 8 == interpolation_search(sequence, num3, ) + + +def test_exponential_search(): + sequence = [1, 4, 6, 10, 14, 18, 24, 39, 50] + num1 = 10 + num2 = 11 + num3 = 50 + + assert 3 == exponential_search(sequence, num1,) + assert not exponential_search(sequence, num2, ) + assert 8 == exponential_search(sequence, num3, ) \ No newline at end of file