\documentclass[11pt]{article}
\usepackage[margin=1in]{geometry}
\usepackage{amsmath,amssymb,amsthm}
\usepackage{lmodern}
\newtheorem{definition}{Definition}
\newtheorem{proposition}{Proposition}
\newtheorem{conjecture}{Conjecture}
\title{Greedy Smooth-Base Construction for Prime Gaps\\
via Sums and Differences of Smooth Numbers}
\author{Praveen Kumar}
\date{May 2026}
\begin{document}
\maketitle
\begin{abstract}
This note studies a greedy construction arising from additive representations by smooth numbers. Starting from the base set $S_1 = \{2,3,5\}$, let $M(S_k)$ denote the multiplicative semigroup of $S_k$-smooth numbers, and let $R(S_k) = \{|A \pm B| : A,B \in M(S_k)\}$ be the associated additive reachable set. At each step, define $P_k$ to be the smallest prime not contained in $R(S_k)$, and update the base by $S_{k+1} = S_k \cup \{P_k\}$. This produces a greedily defined sequence of ``minimal failing primes'', beginning $823, 941, 1193, 1447, 1549, 1951, \dots$, which records the successive obstructions to additive coverage by smooth numbers generated from the current base. The construction is elementary and computational, but it suggests a structured interaction between multiplicative smoothness and additive prime coverage, and raises natural questions about growth, density, and dependence on the initial base.
\end{abstract}
\section{Introduction}
The distribution of prime numbers is often studied through additive representations, in which primes or integers are expressed as sums or differences of structured sets such as powers, smooth numbers, or primes themselves. In this note the starting point is a finite set of primes $S$, from which one forms the multiplicative semigroup $M(S)$ of $S$-smooth numbers and then the additive reachable set $R(S) = \{|A \pm B| : A,B \in M(S)\}$. The basic question is: given an initial base $S$, which primes already lie in $R(S)$, and which primes are missed and therefore act as obstructions to additive coverage by the current smooth basis?
We focus on a simple greedy feedback procedure beginning with $S_1 = \{2,3,5\}$. At each step $k$, the smallest prime $P_k$ that does not lie in $R(S_k)$ is adjoined to the base to form $S_{k+1} = S_k \cup \{P_k\}$. This produces a recursively defined sequence of ``minimal failing primes'' and a growing family of bases $S_k$ whose associated reachable sets cover an increasing proportion of the primes. The goal of this paper is to record the construction, illustrate its behaviour through explicit examples and computations, and formulate some natural questions about the growth and structure of the resulting sequence.
\section{Definitions and Algorithm}
Let $\mathbb{P} = \{2,3,5,7,11,\dots\}$ denote the set of all prime numbers. Fix an initial finite set of primes
\[
S_1 = \{2,3,5\} \subset \mathbb{P}.
\]
Given a finite set of primes $S_k = \{p_1,\dots,p_m\}$, define the associated multiplicative semigroup
\[
M(S_k) = \bigl\{ p_1^{a_1} \cdots p_m^{a_m} : a_1,\dots,a_m \in \mathbb{Z}_{\ge 0} \bigr\}.
\]
Its elements are exactly the positive integers whose prime factors all lie in $S_k$, and will be called $S_k$-smooth numbers.
From $M(S_k)$ we form the additive reachable set
\[
R(S_k) = \bigl\{\, |A \pm B| : A,B \in M(S_k) \bigr\}.
\]
Thus $R(S_k)$ consists of all nonnegative integers that can be obtained as the absolute value of a sum or a difference of two $S_k$-smooth numbers.
\begin{definition}[Greedy smooth--base prime sequence]
For each $k \ge 1$, the next failing prime with respect to $S_k$ is defined by
\[
P_k = \min \bigl\{ p \in \mathbb{P} \setminus \{2\} : p \notin R(S_k) \bigr\}.
\]
The base is then updated by the feedback rule
\[
S_{k+1} = S_k \cup \{P_k\}.
\]
The sequence $(P_k)_{k \ge 1}$ obtained in this way, together with the chain of bases $(S_k)_{k \ge 1}$, is called the greedy smooth--base prime sequence associated with the initial set $S_1 = \{2,3,5\}$.
\end{definition}
\begin{proposition}
Assume that, for each finite base $S_k$, the set of primes not contained in $R(S_k)$ is nonempty. Then the prime $P_k$ exists and is uniquely determined. In particular, under this assumption, the construction of the sequences $(P_k)$ and $(S_k)$ is well defined.
\end{proposition}
\begin{proof}
Fix $k \ge 1$, and assume that the set
\[
\bigl\{ p \in \mathbb{P} \setminus \{2\} : p \notin R(S_k) \bigr\}
\]
is nonempty. Since this is a nonempty subset of the positive integers, it has a least element by the well-ordering principle. That least element is precisely $P_k$, and it is unique by definition. The update rule $S_{k+1} = S_k \cup \{P_k\}$ then determines the next base uniquely.
\end{proof}
\section{First Break and Correction Examples}
In this section some of the first steps of the construction are described explicitly. All computations were carried out using the reference implementation given in the appendix.
\subsection{Successes with the initial base $\{2,3,5\}$}
At the first stage the active base is
\[
S_1 = \{2,3,5\}.
\]
The semigroup $M(S_1)$ consists of all $\{2,3,5\}$-smooth numbers such as
\[
1,2,3,4,5,6,8,9,10,12,15,16,18,20,24, \dots .
\]
Many small primes can be written in the form $|A \pm B|$ with $A,B \in M(S_1)$. For example:
\[
\begin{array}{rcl}
2 &=& 1 + 1, \\
3 &=& 2 + 1, \\
5 &=& 4 + 1, \\
7 &=& 6 + 1, \\
11 &=& 10 + 1, \\
13 &=& 12 + 1, \\
17 &=& 16 + 1, \\
811 &=& 810 + 1.
\end{array}
\]
Here all the summands are $\{2,3,5\}$-smooth (for instance $810 = 2 \cdot 3^4 \cdot 5$). Computations indicate that with this base every prime $p$ below the first break can be expressed in this way.
\subsection{First break at $P_1 = 823$}
The first break occurs at the prime $823$. With the base $S_1 = \{2,3,5\}$ there are no $\{2,3,5\}$-smooth numbers $A$ and $B$ such that $|A \pm B| = 823$. In other words, $823$ is the smallest prime that does not belong to the reachable set $R(S_1)$. By definition of the greedy sequence,
\[
P_1 = 823
\]
and the base is enlarged to
\[
S_2 = S_1 \cup \{823\} = \{2,3,5,823\}.
\]
\subsection{Correction after adjoining $823$ and the second break}
Once $823$ is adjoined to the base, the new semigroup $M(S_2)$ includes products involving $823$ together with $2,3,5$, such as
\[
823,\ 2 \cdot 823,\ 3 \cdot 823,\ 5 \cdot 823,\ 823^2,\ 2 \cdot 3 \cdot 823, \dots .
\]
The corresponding reachable set $R(S_2)$ strictly contains $R(S_1)$. In particular several nearby primes that were previously unreachable now admit simple representations; for instance
\[
821 = 823 - 2,\qquad
823 = 1646 - 823,\qquad
827 = 823 + 4,\qquad
829 = 823 + 6,
\]
where $2,4$ and $6$ are all $\{2,3,5\}$-smooth.
By direct computation one finds that, with the enlarged base $S_2$, every prime $p < 941$ lies in $R(S_2)$, while
\[
941 \notin R(S_2).
\]
Thus the second break occurs at
\[
P_2 = 941,
\]
and the base is updated again to
\[
S_3 = S_2 \cup \{941\} = \{2,3,5,823,941\}.
\]
Further iterations proceed analogously: at each step $k$ the prime $P_k$ is the smallest prime not in $R(S_k)$, and adjoining $P_k$ to the base typically restores coverage up to a new, higher threshold before the next break occurs.
\section{Computational Results}
This section summarizes the numerical exploration of the greedy smooth--base prime sequence. In all computations a finite search bound $X$ was imposed, and only smooth numbers $A,B$ with $|A \pm B| \le X$ were used when constructing $R(S_k)$. As a result, the later terms of the sequence depend on the chosen bound, while the earliest terms appear stable.
\subsection{First terms for a bound of $10^4$}
With a search bound of $X = 10^4$ for the reachable set $R(S_k)$, starting from $S_1 = \{2,3,5\}$ and applying the greedy rule for fifteen steps yields the sequence
\[
P_1,\dots,P_{15} =
823,\ 941,\ 1193,\ 1447,\ 1549,\ 1951,\ 2539,\ 2677,\ 2711,\ 2753,\ 3257,\ 3313,\ 4783,\ 4909,\ 4987.
\]
These primes are, relative to this truncation, the smallest primes not in $R(S_k)$ at each respective stage $k$.
\subsection{First terms for a bound of $10^5$}
To test the robustness of these values, the computation was repeated with a larger search bound of $X = 10^5$ for $R(S_k)$ and primes tested up to $10^5$. In this regime the first six terms of the sequence remained unchanged,
\[
P_1,\dots,P_6 = 823,\ 941,\ 1193,\ 1447,\ 1549,\ 1951,
\]
but the later breaks shifted to
\[
P_7,\dots,P_{15} =
2609,\ 2713,\ 3797,\ 4457,\ 4987,\ 5231,\ 6229,\ 6491,\ 6637.
\]
Thus the segment $(P_1,\dots,P_6)$ appears stable across the two bounds considered, while the location of later breaks is sensitive to the truncation level. Preliminary experiments with still larger bounds (up to $X = 10^6$) are consistent with this picture, but become computationally expensive and are not reported in detail here.
\subsection{Coverage of primes up to a bound}
For each base $S_k$ one can ask what proportion of primes up to a fixed bound $X$ lie in the reachable set $R(S_k)$. For $X = 5000$ the following approximate coverage was observed:
\[
\begin{array}{lcl}
S_1 = \{2,3,5\}: &\text{about}& 61\% \text{ of primes $\le 5000$ in $R(S_1)$},\\
S_2 = S_1 \cup \{P_1\}: &\text{about}& 72\% \text{ covered},\\
S_4 = S_1 \cup \{P_1,P_2,P_3\}: &\text{about}& 86\% \text{ covered},\\
S_{11} = S_1 \cup \{P_1,\dots,P_{10}\}: &\text{about}& 95\% \text{ covered},\\
S_{16} = S_1 \cup \{P_1,\dots,P_{15}\}: &\text{about}& 97\% \text{ covered}.
\end{array}
\]
Thus a relatively small number of added primes suffices to obtain high coverage of primes up to this bound.
\subsection{Conjectural growth of the sequence}
The current computations are too limited to suggest a precise asymptotic formula for $P_k$, but some qualitative behaviour can be recorded. In the range explored numerically, the ratios $P_{k+1}/P_k$ for the stable initial segment remain moderate, and there is no clear indication of extremely rapid growth. It is natural to ask whether there exist constants $c_1,c_2>0$ and exponents $\alpha_1,\alpha_2$ such that
\[
c_1 k^{\alpha_1} \le P_k \le c_2 k^{\alpha_2}
\]
for all sufficiently large $k$, or even whether $P_k$ grows on a polynomial scale in $k$. At present this remains entirely conjectural, and the available data are too sparse to support more than a qualitative guess.
\begin{conjecture}
The greedy smooth--base prime sequence $(P_k)$ is infinite, and $P_k$ grows at most polynomially in $k$.
\end{conjecture}
\section{Discussion and Open Questions}
The greedy smooth--base construction is not intended as an efficient algorithm for generating primes. Classical sieves and modern probabilistic primality tests are far superior for that purpose. Instead, the present construction should be viewed as a model that highlights which primes are ``structurally necessary'' in order for sums and differences of smooth numbers to cover most primes.
Several natural questions arise:
\begin{itemize}
\item Is the sequence $(P_k)$ infinite? Equivalently, does every finite base $S_k$ miss at least one prime in its reachable set $R(S_k)$?
\item How quickly does $P_k$ grow as a function of $k$? For example, does $P_k$ grow polynomially, exponentially, or in some intermediate fashion?
\item For a fixed bound $X$, how does the proportion of primes $\le X$ that lie in $R(S_k)$ behave as $k$ increases? Does this proportion tend to $1$ as $k \to \infty$ for each fixed $X$?
\item How sensitive is the construction to the initial base $S_1$? For instance, what changes if one starts from $\{2,3,7\}$ or from a larger set of small primes?
\end{itemize}
These questions place the computation in the context of additive bases, smooth numbers, and $S$-unit type phenomena, and suggest directions for both theoretical and further experimental study.
\appendix
\section{Python Reference Implementation}
For completeness, a simple Python implementation of the greedy smooth--base construction is provided. The parameters \texttt{prime\_limit} and \texttt{max\_val} control, respectively, the range of primes searched for breaks and the truncation level $X$ used when forming the reachable set $R(S_k)$. Small-scale experiments (for example $X = 10^4$) run very quickly, while larger bounds (such as $X = 10^5$ or $10^6$) become progressively more expensive.
\begin{verbatim}
import time
def is_prime(n: int) -> bool:
"""Return True if n is prime, False otherwise."""
if n < 2:
return False
if n == 2:
return True
if n % 2 == 0:
return False
k = 3
while k * k <= n:
if n % k == 0:
return False
k += 2
return True
def build_primes(limit: int):
"""Precompute primes up to 'limit' using trial division."""
primes = []
for n in range(2, limit + 1):
if is_prime(n):
primes.append(n)
return primes
def get_reachable_set(base_primes, max_val):
"""
Compute the set of integers <= max_val reachable as |A ± B|
where A and B are base_primes-smooth numbers.
"""
smooth = {1}
for p in base_primes:
new_smooth = set(smooth)
for s in smooth:
x = s * p
while x <= max_val:
new_smooth.add(x)
x *= p
smooth = new_smooth
smooth_list = sorted(smooth)
n = len(smooth_list)
reachable = set()
for i in range(n):
a = smooth_list[i]
for j in range(i, n):
b = smooth_list[j]
s = a + b
if s <= max_val:
reachable.add(s)
d = b - a
if d <= max_val:
reachable.add(d)
return reachable
def greedy_smooth_base_sequence(num_terms=15,
prime_limit=100000,
max_val=100000):
"""
Generate the greedy smooth-base prime sequence starting from {2,3,5}.
prime_limit : largest prime to search over
max_val : upper bound for |A ± B| and for smooth numbers
"""
print(f"Building primes up to {prime_limit} ...")
t0 = time.time()
primes_list = build_primes(prime_limit)
print(f" Found {len(primes_list)} primes in {time.time() - t0:.2f} s\n")
base = [2, 3, 5]
seq = []
for k in range(num_terms):
print(f"=== Iteration {k+1} ===")
t1 = time.time()
reachable = get_reachable_set(base, max_val)
print(f" Reachable set size: {len(reachable)} "
f"(computed in {time.time() - t1:.2f} s)")
for p in primes_list:
if p not in reachable:
seq.append(p)
base.append(p)
print(f" Next failing prime P_{k+1} = {p}")
break
print(f" Current base size: {len(base)}\n")
return seq, base
if __name__ == "__main__":
sequence, final_base = greedy_smooth_base_sequence(
num_terms=15,
prime_limit=100000,
max_val=100000
)
print("Greedy smooth-base sequence (first terms):")
print(sequence)
\end{verbatim}
\end{document}