summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrett Weiland <brett_weiland@gmail.com>2024-04-12 16:23:21 -0500
committerBrett Weiland <brett_weiland@gmail.com>2024-04-12 16:23:21 -0500
commit1f37ade7b2f97d96855316248724d791eec28ad1 (patch)
treeb847358830a66c5748164e41f4e227f2738ccd69
parentb0b2eb6ff181ae84d7a83807f47c3cec25451969 (diff)
boutta paste spellchecked ver
-rw-r--r--report/report.aux10
-rw-r--r--report/report.log17
-rw-r--r--report/report.pdfbin233976 -> 232746 bytes
-rw-r--r--report/report.tex68
4 files changed, 51 insertions, 44 deletions
diff --git a/report/report.aux b/report/report.aux
index 452923f..ad7283b 100644
--- a/report/report.aux
+++ b/report/report.aux
@@ -1,12 +1,12 @@
\relax
-\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Add and Subtract operations of various Operand Lengths}}{4}{}\protected@file@percent }
+\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Iteration count of various operand lengths.}}{4}{}\protected@file@percent }
\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}}
\newlabel{igraph}{{1}{4}{}{}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Add and Subtract operations of various Operand Lengths}}{5}{}\protected@file@percent }
-\newlabel{pgraph}{{2}{5}{}{}{}}
-\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Number of additions and subtractions for various inputs}}{6}{}\protected@file@percent }
+\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Add and Subtract operations of various operand lengths.}}{4}{}\protected@file@percent }
+\newlabel{pgraph}{{2}{4}{}{}{}}
+\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Number of additions and subtractions for various inputs.}}{6}{}\protected@file@percent }
\newlabel{speed_table}{{1}{6}{}{}{}}
-\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Results of multiplication according to simulated multipliers}}{6}{}\protected@file@percent }
+\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Results of multiplication according to simulated multipliers.}}{6}{}\protected@file@percent }
\newlabel{result_table}{{2}{6}{}{}{}}
\@writefile{lol}{\contentsline {lstlisting}{../booth\textunderscore multiplier.py}{7}{}\protected@file@percent }
\gdef \LT@i {\LT@entry
diff --git a/report/report.log b/report/report.log
index c65e108..2484a5d 100644
--- a/report/report.log
+++ b/report/report.log
@@ -1,4 +1,4 @@
-This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024/Arch Linux) (preloaded format=pdflatex 2024.4.11) 12 APR 2024 15:40
+This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024/Arch Linux) (preloaded format=pdflatex 2024.4.11) 12 APR 2024 16:13
entering extended mode
restricted \write18 enabled.
%&-line parsing enabled.
@@ -624,17 +624,22 @@ LaTeX Font Info: External font `cmex10' loaded for size
{/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map}{/usr/share/texmf-dist/fonts
/enc/dvips/base/8r.enc}{/usr/share/texmf-dist/fonts/enc/dvips/tex-gyre/q-rm.enc
}]
-Underfull \hbox (badness 10000) in paragraph at lines 62--63
+Underfull \hbox (badness 10000) in paragraph at lines 92--93
[]
[2] (./iterations.pgf
LaTeX Font Info: External font `cmex10' loaded for size
(Font) <5> on input line 90.
-) [3] (./performance.pgf) [4] [5] (./speed_table.tex)
-(./result_table.tex) [6] (/usr/share/texmf-dist/tex/latex/listings/lstlang1.sty
+)
+
+LaTeX Warning: `h' float specifier changed to `ht'.
+
+(./performance.pgf) [3] [4] [5] (./speed_table.tex) (./result_table.tex)
+[6] (/usr/share/texmf-dist/tex/latex/listings/lstlang1.sty
File: lstlang1.sty 2024/02/21 1.10 listings language file
-) (../booth_multiplier.py [7] [8] [9]) (./debug_table.tex
+)
+(../booth_multiplier.py [7] [8] [9]) (./debug_table.tex
Overfull \hbox (47.28491pt too wide) in alignment at lines 1--5
[] [] [] [] [] [] [] []
[]
@@ -666,7 +671,7 @@ tt12.pfb></usr/share/texmf-dist/fonts/type1/public/tex-gyre/qplb.pfb></usr/shar
e/texmf-dist/fonts/type1/public/tex-gyre/qplr.pfb></usr/share/texmf-dist/fonts/
type1/public/tex-gyre/qplri.pfb></usr/share/texmf-dist/fonts/type1/urw/helvetic
/uhvr8a.pfb>
-Output written on report.pdf (11 pages, 233976 bytes).
+Output written on report.pdf (11 pages, 232746 bytes).
PDF statistics:
90 PDF objects out of 1000 (max. 8388607)
58 compressed objects within 1 object stream
diff --git a/report/report.pdf b/report/report.pdf
index f0ce600..9944465 100644
--- a/report/report.pdf
+++ b/report/report.pdf
Binary files differ
diff --git a/report/report.tex b/report/report.tex
index 23fc494..5699bab 100644
--- a/report/report.tex
+++ b/report/report.tex
@@ -40,12 +40,12 @@ Analyzing Performance of Booth’s Algorithm and Modified Booth’s Algorithm}
\begin{document}
\maketitle
\begin{abstract}
-In this paper, the performance of Booth’s Algorithm is compared to modified Booth's Algorithm. Each multiplier is simulated in Python, and performance is observed by counting the number of add and subtract operations for inputs of various lengths. Results are analyzed and discussed to highlight the potential tradeoffs one should consider when deciding what multiplier is to be used.
+In this paper, the performance of Booth’s algorithm is compared to modified Booth's algorithm. Each multiplier is simulated in Python. The multipliers are benchmarked by counting the number of add and subtract operations for inputs of various lengths. Results are analyzed and discussed to highlight the potential tradeoffs one should consider when deciding what multiplier is to be used.
\end{abstract}
\section*{Introduction}
Multiplication is among the most time consuming mathematical operations for processors. In many applications, the time it takes to multiply dramatically influences the speed of the program. Applications of digital signal processing (such as audio modification and image processing) require constant multiply and accumulate operations for functions such as fast fourier transformations and convolutions. Other applications are heavily dependent on multiplying large matrices, such as machine learning, 3D graphics and data analysis. In such scenarios, the speed of multiplication is vital. Consequently, most modern processors implement hardware multiplication. However, not all hardware multiplication schemes are equal; there is often a stark contrast between performance and hardware complexity. To further complicate things, multiplication circuits perform differently depending on what numbers are being multiplied.
\section*{Algorithm Description}
-Booth's algorithim computes the product of two signed numbers in two's compliment format. To avoid overflow, the result is placed into a register two times the size of the operands (or two registers the size of a single operand). Additionally, the algorithim must work with a space that is exended one bit more then the result. For the purpose of brevity, the result register and extra bit will be refered to as the workspace, as the algorithim uses this space for its computations. First, the multiplier is placed into the workspace and shifted left by 1. From there, the multiplier is used to either add or subtract from the upper half of the workspace. The specific action is dependent on the last two bits of the workspace.
+Booth's algorithim computes the product of two signed numbers in two's compliment format. To avoid overflow, the result is placed into a register two times the size of the operands (or two registers the size of a single operand). Additionally, the algorithim must work with a space that is exended one bit more then the result. For the purpose of brevity, the result register and extra bit will be refered to as the workspace, as the algorithim uses this space for its computations. First, the multiplier is placed into the workspace and shifted left by 1. From there, the multiplicand is used to either add or subtract from the upper half of the workspace. The specific action is dependent on the last two bits of the workspace.
\begin{table}[H]
\centering
\begin{tabular}{lll}
@@ -59,20 +59,8 @@ Bit 1 & Bit 0 & Action \\
\bottomrule
\end{tabular}
\end{table}
-After all iterations are complete, the result is arithmaticlly shifted once to the left, and the process repeats for the number of bits in an operand. The pseudo code for this algorithim is below:\\
- \begin{verbatim}
-Booth:
- result = multiplier << 1
- loop (operand length) times:
- if last two bits are 01:
- result(upper half) += multiplicand
- if last two bits are 10:
- result(upper half) += twos_comp(multiplicand)
- remove extra bits from result
- arithmatic shift result right
-result >> 1
- \end{verbatim}
-
+After all iterations are complete, the result is arithmaticlly shifted once to the right, and the process repeats for the number of bits in an operand.
+\par
Modified booth's algorithim functions similar to Booth's algorithim, but checks the last \textit{three} bits instead. As such, there are a larger selection of actions for each iteration:
\begin{table}[H]
\centering
@@ -91,8 +79,29 @@ Bit 2 & Bit 1 & Bit 0 & Action \\
\bottomrule
\end{tabular}
\end{table}
-Because some operations require doubling the multiplicand, an extra bit is added to the most significant side of the workspace to avoid overflow. After each iteration, the result is arithmaticlly shifted right twice. The number of iterations is only half of the length of the operands. After all iterations, the workspace is shifted right once, and the second most significant bit is set to the first most significant bit as the result register does not include the extra bit.
-Pseudo code for this algorithim is listed below:
+Because some operations require doubling the multiplicand, an additional extra bit is added to the most significant side of the workspace to avoid overflow. After each iteration, the result is arithmaticlly shifted right twice. The number of iterations is only half of the length of the operands. After all iterations, the workspace is shifted right once, and the second most significant bit is set to the first most significant bit as the result register does not include the extra bit.
+
+\par
+\section*{Simulation Implimentation}
+Both algorithims were simulated in Python in attempts to utalize its high level nature for rapid development. The table for Booth's algorithim was preformed with a simple if-then, while a switch case was used in modified booth's algorithim. Simple integers were used to represent registers.
+\par
+One objective of this paper is to analyze and compare the peformance of these two algorithms for various operand lengths. As such, the length of operands had to be constantly accounted for. Aritmatic bitwise operations, including finding two's compliment, were all implimented using functions that took length as an input. Further more, extra bits were cleared after each iteration.
+\par
+To track down issues and test the validity of the multipliers, a debug function was written. To allow Python to natively work with the operands, each value is calculated from its two's compliment format. The converted numbers are then multiplied, and the result is used to verify both Booth's Algorithim and Modified Booth's Algorithim. To ensure that the debugging function itself doesn't malfunction, all converted operands and expected results are put into a single large table for checking. The exported version of this table can be seen on the last page in table \ref{debug_table}. % TODO
+
+The pseudo code below illustrates how each algorithim was implimented in software. For the full code, refer to the listing at the end of the document.\\
+\begin{verbatim}
+Booth:
+ result = multiplier << 1
+ loop (operand length) times:
+ if last two bits are 01:
+ result(upper half) += multiplicand
+ if last two bits are 10:
+ result(upper half) += twos_comp(multiplicand)
+ remove extra bits from result
+ arithmatic shift result right
+result >> 1
+\end{verbatim}
\begin{verbatim}
Modified booth:
multiplicand(MSB) = multiplicand(second MSB)
@@ -113,32 +122,25 @@ Modified booth:
result(MSB) = 0
\end{verbatim}
-\par
-\section*{Simulation Implimentation}
-Both algorithims were simulated in Python in attempts to utalize its high level nature for rapid development. The table for Booth's algorithim was preformed with a simple if-then loop, while a switch case was used in modified booth's algorithim. Simple integers were used to represent registers.
-\par
-One objective of this paper is to analyze and compare the peformance of these two algorithms for various operand lengths. As such, the length of operands had to be constantly accounted for. Aritmatic bitwise operations, including finding two's compliment, were all implimented using functions that took length as an input. Further more, extra bits were cleared after each iteration.
-\par
-To track down issues and test the validity of the multipliers, a debug function was written. To allow Python to natively work with the operands, each value is calculated from its two's compliment format. The converted numbers are then multiplied, and the result is compared to both Booth's Algorithim and Modified Booth's Algorithim. To ensure that the debugging function itself doesn't malfunction, all converted operands and expected results are put into a single large table for checking. The exported version of this table can be seen on the last page, in table \ref{debug_table}. % TODO
\section*{Analysis}
-Modified Booth's algorithim only requires half the iterations as Booth's algorithim. As such, it can be expected that the benifit of modified Booth's algorithim increases two fold with bit length. This can be shown by comparing the two curves in figure \ref{igraph}.
-\begin{figure}[H]
+Modified Booth's algorithim only requires half the iterations of Booth's algorithim. As such, it can be expected that the benifit of modified Booth's algorithim increases two fold with bit length. This can be shown by comparing the two curves in figure \ref{igraph}.
+\begin{figure}[h]
\centering
\input{iterations.pgf}\\
- \captionof{figure}{Add and Subtract operations of various Operand Lengths}
+ \captionof{figure}{Iteration count of various operand lengths.}
\label{igraph}
\end{figure}
\par
-Despite this, the nature of both algorithims dictate that modified booth's algorithim is not explicitly faster. Iteration count translates to the \textit{maxiumum} number of additions and subtractions. Figure \ref{pgraph} shows the performance of the two algorithims given different input lengths, while table x shows the actual data made to generate the plot. There are some interesting things to note. When operands contain repeating zeros or ones, both operations preform similarly, as only shifting is required. Operands containing entirely ones or zeros result in idential preformance. On the contrary, alternating bits within operands demonstrate where the two algorithims differ, as almost no bits can be skipped over. Operands made entirely of alternating bits result in the maximum performance diffrence, in which modified booth's algorithim is potentially two times faster.
+Despite this, the nature of both algorithims dictate that modified booth's algorithim is not explicitly faster. Iteration count translates to the \textit{maxiumum} number of additions and subtractions. Figure \ref{pgraph} shows the performance of the two algorithims given different input lengths, while table \ref{speed_table} shows the actual data used to generate the plot. There are some interesting things to note. When operands contain repeating zeros or ones, both operations preform similarly, as only shifting is required. Operands containing entirely ones or zeros result in idential preformance. On the contrary, alternating bits within operands demonstrate where the two algorithims differ, as almost no bits can be skipped over. Operands made entirely of alternating bits result in the maximum performance diffrence, in which modified booth's algorithim is up to two times faster.
\begin{figure}[H]
\centering
\input{performance.pgf}\\
- \captionof{figure}{Add and Subtract operations of various Operand Lengths}
+ \captionof{figure}{Add and Subtract operations of various operand lengths.}
\label{pgraph}
\end{figure}
\par
-All of this needs to be considered when designing an ALU. Modified booth's algorithim may improve speed, but requires substantially more hardware to impliment. One must consider if die space is to be allocated to optimize multiplication. In many applications, fast multiplication is unnessesary; many early single-chip processors and microcontrollers didn't impliment multiplication, as they were intended for simple embeded applications.
+All of this needs to be considered when deciding between the two algorithims. Modified booth's algorithim may improve speed, but requires substantially more hardware to impliment. One must consider if it's worth the cost to optimize multiplication. In many applications, fast multiplication is unnessesary; many early single-chip processors and microcontrollers didn't impliment multiplication, as they were intended for simple embeded applications.
\section*{Conclusion}
Hardware multipliers can help accellerate applications in which multiplication is frequent. When implimenting hardware multipliers, it's important to consider the advantages and disadvantages of various multiplier schemes. Modified Booth's algorithim gives diminishing returns for smaller operands and requires significantly more logic. In applications that depend heavily on fast multiplication of large numbers, modified booth's algorithim is optimal.
% mba generally but not always faster
@@ -154,12 +156,12 @@ Hardware multipliers can help accellerate applications in which multiplication i
\begin{figure}[h]
\centering
\input{speed_table.tex}
- \captionof{table}{Number of additions and subtractions for various inputs}
+ \captionof{table}{Number of additions and subtractions for various inputs.}
\label{speed_table}
\end{figure}
\begin{figure}[H]
\input{result_table.tex}
- \captionof{table}{Results of multiplication according to simulated multipliers}
+ \captionof{table}{Results of multiplication according to simulated multipliers.}
\label{result_table}
\end{figure}