Lesson 11 sieve of eratosthenes

jsueling · jsueling · commit c4a803d694f1 · 2025-12-07T17:59:30.000Z
diff --git a/11_count_non_divisible.py b/11_count_non_divisible.py
@@ -0,0 +1,75 @@
+"""https://app.codility.com/programmers/lessons/11-sieve_of_eratosthenes/count_non_divisible/"""
+
+from math import sqrt, floor
+from collections import Counter
+
+# Time Complexity:
+# Precompute freqency count of each element in A = O(N)
+# Each element in A in range [1, 100_000], let M be the max element in A
+# Outer loop: iterate over M counts = O(M)
+# Inner loop: for each element iterate over possible divisors = O(sqrt(M))
+# Return result for each element in A = O(N)
+# Overall: O(2 * N + M * sqrt(M)) = O(N + M * sqrt(M))
+
+def solution_a(a: list[int]) -> list[int]:
+    """
+    For each element in array A, count the number of elements
+    of A (including self) that don't divide the element.
+    
+    First attempt: Directly count divisors for each unique element.
+    Codility score: 88%, failing performance tests
+    """
+
+    count_lookup = Counter(a)
+    non_divisible_count = Counter()
+    n = len(a)
+    for element in count_lookup:
+        divisor_count = 0
+        for divisor in range(1, floor(sqrt(element)) + 1):
+            q, r =  divmod(element, divisor)
+            if r == 0:
+                divisor_count += count_lookup[divisor]
+                if q != divisor:
+                    divisor_count += count_lookup[q]
+        # Non-divisors can be found implicitly through divisors
+        num_non_divisor = n - divisor_count
+        non_divisible_count[element] = num_non_divisor
+
+    return [non_divisible_count[a[i]] for i in range(n)]
+
+# Time Complexity:
+# Precompute freqency count of each element in A = O(N)
+# Each element in A in range [1, 100_000], let M be the max element in A
+# Outer loop: iterate over M counts = O(M)
+
+# Inner loop: When M is 1, does M/1 work. When M is 2, does M/2 work.
+# Across all M iterations, work done is:
+# M/1 + M/2 + M/3 + ... + M/M = M * (1 + 1/2 + 1/3 + ... + 1/M).
+# The bracketed terms are approximated by the harmonic series which is O(log M),
+# so the inner loop does total work = O(M log M).
+# Overall: O(N + M + M log M) = O(N + M log M)
+
+def solution_b(a: list[int]) -> list[int]:
+    """Optimised solution using the sieve of eratosthenes."""
+    if not a:
+        return []
+
+    n = len(a)
+    m = max(a)
+
+    # Freq count of each number in A
+    counts = [0] * (m + 1)
+    for num in a:
+        counts[num] += 1
+
+    # Number of divisors for each number in A
+    divisor_counts = [0] * (m + 1)
+
+    for divisor in range(1, m + 1):
+        if counts[divisor] > 0:
+            k = divisor
+            while k <= m:
+                divisor_counts[k] += counts[divisor]
+                k += divisor
+
+    return [n - divisor_counts[num] for num in a]
diff --git a/11_count_semiprimes.py b/11_count_semiprimes.py
@@ -0,0 +1,94 @@
+"""https://app.codility.com/programmers/lessons/11-sieve_of_eratosthenes/count_semiprimes/"""
+
+from math import floor, sqrt
+
+# Time Complexity:
+# Sieve of Eratosthenes to find all primes up to N = O(Nlog(logN))
+# For each number up to N, check all possible divisors = O(N*sqrt(N))
+# For M queries, return result in O(M)
+# Overall: O(Nlog(logN) + Nsqrt(N) + M) = O(Nsqrt(N) + M)
+
+def solution_a(n: int, p: list[int], q: list[int]) -> list[int]:
+    """
+    P, Q of lengths M represent M queries.
+    For each query, find the number of semiprimes within the inclusive range [P[K], Q[K]].
+    A semiprime is the product of two prime numbers.
+    E.g. 4, 6, 9, 10, 14 .. are semiprimes
+    N is the maximum value in P or Q.
+    Compute and return the queries
+
+    This solution passes 100% on Codility tests. The detected TC is O(N * log(log(N)) + M),
+    which is not the TC we expected: O(Nsqrt(N) + M)
+    """
+
+    # sieve[i] is True if i is prime
+    sieve = [True] * (n + 1)
+    sieve[0] = sieve[1] = False
+
+    # Multiples of the form k * i where k < i are already marked by smaller primes.
+    # E.g. for i = 5, multiples 5*2=10, 5*3=15, 5*4=20 are already marked by 2 and 3.
+    # Therefore, start from i*i
+
+    i = 2
+    while i * i <= n:
+        if sieve[i]:
+            k = i * i
+            while k <= n:
+                sieve[k] = False
+                k += i
+        i += 1
+
+    semiprime_count = 0
+    # prefix[i] is the number of semiprimes up to and including number i
+    prefix = []
+    # For each number check all possible divisors
+    for num in range(n+1):
+        for divisor in range(2, floor(sqrt(num)) + 1):
+            # Definition of semiprime: divisor and quotient are prime
+            if sieve[divisor] and num % divisor == 0 and sieve[num // divisor]:
+                semiprime_count += 1
+        prefix.append(semiprime_count)
+    return [prefix[q[i]] - prefix[p[i]-1] for i in range(len(p))]
+
+# Time Complexity:
+# Instantiating SPF (smallest prime factor) sieve over [0..N] = O(Nlog(logN))
+# Create prefix sum array = O(N)
+# Fill prefix sum array with O(1) work per element using SPF = O(N)
+# Do M queries, with O(1) work per element using prefix sum array = O(M)
+# Overall: O(Nlog(logN) + 2 * N + M) = O(Nlog(logN) + M)
+
+def solution_b(n: int, p: list[int], q: list[int]) -> list[int]:
+    """Optimised solution generated by gemini-3-pro (explained in comments)"""
+
+    # spf[i] is the smallest prime factor for number i
+    # If spf[i] == 0, then i is prime
+    spf = [0] * (n+1)
+    i = 2
+    while i * i <= n:
+        # Sieve if i is prime, recording the smallest prime factor
+        if spf[i] == 0:
+            k = i * i
+            while k <= n:
+                if spf[k] == 0:
+                    spf[k] = i
+                k += i
+        i += 1
+
+    # prefix[i] is the number of semiprimes up to and including number i
+    prefix = [0] * (n + 1)
+    for num in range(2, n + 1):
+        # Composite number (candidate for semiprime)
+        if spf[num] != 0:
+            smallest_prime_factor = spf[num]
+            complement = num // smallest_prime_factor
+            if spf[complement] == 0:
+                prefix[num] += 1
+        prefix[num] += prefix[num - 1]
+
+    # Why dividing by SPF can verify semiprimes correctly:
+    # 1. If N is prime (single prime factor), we don't enter the conditional (... if spf[num] != 0:)
+    # 2. If N is semiprime (2 prime factors), dividing by SPF[N] returns the other prime factor
+    # 3. If N has more than 2 prime factors (every other number), dividing by SPF[N]
+    #    results in another composite number (not prime)
+
+    return [prefix[q[i]] - prefix[p[i]-1] for i in range(len(p))]