From 5c43ffb6020b66a44be7c0ba1b6226581c701bbb Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Thu, 26 Sep 2019 23:30:53 -0700 Subject: [PATCH] Initial solution to homework --- qselect.py | 15 ++++++++++++++ qsort.py | 29 +++++++++++++++++++++++++++ report.txt | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+) create mode 100644 qselect.py create mode 100644 qsort.py create mode 100644 report.txt diff --git a/qselect.py b/qselect.py new file mode 100644 index 0000000..fdb3abe --- /dev/null +++ b/qselect.py @@ -0,0 +1,15 @@ +import random + +def qselect(i, xs): + if xs == []: return None + + pivot = xs.pop(random.randrange(len(xs))) + left = [x for x in xs if x < pivot] + right = [x for x in xs if x >= pivot] + + if i > len(left): + return qselect(i - len(left) - 1, right) + elif i == len(left): + return pivot + else: + return qselect(i, left) diff --git a/qsort.py b/qsort.py new file mode 100644 index 0000000..b09a5ac --- /dev/null +++ b/qsort.py @@ -0,0 +1,29 @@ +def qsort(xs): + if xs == []: return [] + + pivot = xs[0] + left = [x for x in xs if x < pivot] + right = [x for x in xs[1:] if x >= pivot] + return [qsort(left)] + [pivot] + [qsort(right)] + +def sorted(tree): + if tree == []: return [] + return sorted(tree[0]) + [tree[1]] + sorted(tree[2]) + +def search(tree, x): + return _sorted(tree, x) != [] + +def insert(tree, x): + node = _sorted(tree, x) + if node == []: + node.append([]) + node.append(x) + node.append([]) + +def _sorted(tree, i): + if tree == []: return tree + + pivot = tree[1] + if pivot == i: return tree + elif i < pivot: return _sorted(tree[0], i) + else: return _sorted(tree[2], i) diff --git a/report.txt b/report.txt new file mode 100644 index 0000000..4296844 --- /dev/null +++ b/report.txt @@ -0,0 +1,59 @@ +Q: What's the best-case, worst-case, and average-case time complexities of quicksort. + Briefly explain each case. +A: Quicksort has the worst-case complexity of O(n^2). This is because in the worst case, + it will have to iterate over n, n-1, n-2,...,1 items. If the pivot is not picked + randomly, this is guranteed to occur when the list is sorted in either direction. + If the pivot is picked randomly, there is still a chance that the pivot will be either the largest or the smallest of the subarray in question. + + The best-case complexity is O(n*log(n)) because each recursive operation will cut the size + of the input in half. Since the total number of items sorted at a particular depth is + always n, and the depth is logarithmically related to the number of items, the complexity + is O(n*logn(n)). + + On average, Quicksort is also O(n*log(n)). It's quite difficult to consistently pick + a pivot that is either the smallest or the largest. I am unfamilliar with proof + techniques that help formalize this. + +Q: What's the best-case, worst-case, and average-case time complexities? Briefly explain. +A: For the same reason as quicksort, in the worst case, the complexity is O(n^2). + If the algorithm consistently places all the elements to one side of the pivot, + it will need to continue sorting n, n-1, n-2, ..., 1 items. + + In the best case, the input size is halved. This means that first n numbers + are processed, then n/2, then n/4, and so on. We can write this as n*(1+1/2+1/4+...). + The formula for the sum of a finite number of terms from a geometric series + is n(1-r^k)/(1-r). This simplifies to 2n(1-r^k). Since 1-2^k < 1, + n*(1+1/2+1/4+...) < 2n. This means the complexity is O(n). + + For similarly hand-wavey reasons to those in Q0, the average case complexity aligns + with the best-case complexity rather than worst-case complexity. + +Q: What are the time complexities for the operations implemented? +A: The complexity of sorted is O(n*log(n)) in best, and O(n^2) in worst case. + This is because of the way in which it implements + "flattening" the binary search tree - it recursively calls itself, creating + a new array from the results of the two recursive calls and the "pivot" between them. + Since creating a new array from arrays of length m and n is an O(m+n) operation. + Just like with qsort, in the best case, the tree is balanced with a depth of log(n). + Since concatenation at each level will effectively take n steps, the best case complexity + is O(n*log(n)). On the other hand, in the case of a tree with only right children, + the concatenation will take 1+2+...+n steps, which is in the order O(n^2). + + Since insert and search both use _search, and perform no steps above O(1), they are + of the same complexity as _search. _search itself is O(logn) in the average case, + and O(n) in the worst case, so the same is true for the other algorithms. These + complexities are as such because _search is a simple binary search. + +Debriefing: +1. Approximately how many hours did you spend on this assignment? + ~1 hour. +2. Would you rate it as easy, moderate, or difficult? + Very easy, especially since it followed from the slides on day 1. +3. Did you work on it mostly alone, or mostly with other people? + Just me, myself, and I. +4. How deeply do you feel you understand the material it covers (0%–100%)? + 80%. Determining actual complexities of recursive functions has not yet been + taught in class, and I haven't consulted the books. For best and worst case, + though, It's pretty simple. +5. Any other comments? + I'd prefer the code for "broken qsort" to be available on Canvas. Maybe I missed it :)