From 877832fad4adf734e97522656d58f1d480c438ae Mon Sep 17 00:00:00 2001
From: Danila Fedorin <danila.fedorin@gmail.com>
Date: Tue, 16 Mar 2021 16:31:00 -0700
Subject: [PATCH] Add more homework solutions.

---
 HW3.tex |  32 +++++------
 HW4.tex | 107 +++++++++++++++++++++++++++++++++++
 HW5.tex | 173 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 295 insertions(+), 17 deletions(-)
 create mode 100644 HW4.tex
 create mode 100644 HW5.tex

diff --git a/HW3.tex b/HW3.tex
index b55c29d..73b1f2f 100644
--- a/HW3.tex
+++ b/HW3.tex
@@ -80,7 +80,7 @@ For all inverters, then, we get the following:
 
 \pagebreak
 \section*{Q4}
-First, to compute stage effort $\hat{f}$.
+First, to compute total effort $F$.
 
 \begin{equation*}
   \begin{aligned}
@@ -92,29 +92,27 @@ First, to compute stage effort $\hat{f}$.
   \end{aligned}
 \end{equation*}
 
-Assuimng a $p_\text{invs}$ of 1, and thus $\rho = 3.59$, we get:
+Since we are using the same inverter as the one in Q3, we are
+once again using $p_\text{inv}=5$, and thus, have that $\rho=6.138$.
+From this, we can determine the ideal number of stages:
 
 \begin{equation*}
-  \log_\rho F = 7.2 \approx 7
+  \log_\rho F = 5.13 \approx 5
 \end{equation*}
 
-Since we currently have 3 stages, we should insert 4 inverters.
-It appears as though inserting inverters only at the end makes it
-too difficult for the first-stage NAND gate to drive the 3-branched
-NOR gates (we end up with an optimal size less than 1). Instead,
-I will insert two inverters right after the NAND2 gate, and two more
-inverters at the end. We can now compute gate sizes:
+Since we currently have 3 stages, we should insert 2 inverters.
+I will insert these at the end of the path in question. From
+there, we once again compute stage effort $\hat{f}$, and work
+backwards to determine the optimal sizes for all of the stages.
 
 \begin{equation*}
   \begin{aligned}
-      \hat{f} &= \sqrt[7]{11111} \\
-      \text{sz}_7 &= 1000/\hat{f}^1 = 264 \\
-      \text{sz}_6 &= 1000/\hat{f}^2 = 69.8 \\
-      \text{sz}_5 &= 1000/\hat{f}^3 * \left(\frac{5}{3}\right) = 30.8 \\
-      \text{sz}_4 &= 1000/\hat{f}^4 * \left(\frac{5}{3}\right)\left(\frac{5}{3}\right) = 13.5 \\
-      \text{sz}_3 &= 1000/\hat{f}^5 * \left(\frac{5}{3}\right)\left(\frac{5}{3}\right)3 = 10.7  \\
-      \text{sz}_2 &= 1000/\hat{f}^6 * \left(\frac{5}{3}\right)\left(\frac{5}{3}\right)3 = 2.84  \\
-      \text{sz}_1 &= 1000/\hat{f}^7 * \left(\frac{5}{3}\right)\left(\frac{5}{3}\right)3 = 1  \\
+      \hat{f} &= \sqrt[5]{11111} \\
+      \text{sz}_\text{inv1} &= 1000/\hat{f}^1 = 155 \\
+      \text{sz}_\text{inv2} &= 1000/\hat{f}^2 = 24.1 \\
+      \text{sz}_\text{nand3} &= 1000/\hat{f}^3 * \left(\frac{5}{3}\right) = 6.23 \\
+      \text{sz}_\text{nor2} &= 1000/\hat{f}^4 * \left(\frac{5}{3}\right)\left(\frac{5}{3}\right) = 1.61 \\
+      \text{sz}_\text{nand2} &= 1000/\hat{f}^5 * \left(\frac{5}{3}\right)\left(\frac{5}{3}\right)\left(\frac{4}{3}\right)3 = 1
   \end{aligned}
 \end{equation*}
 
diff --git a/HW4.tex b/HW4.tex
new file mode 100644
index 0000000..95394dc
--- /dev/null
+++ b/HW4.tex
@@ -0,0 +1,107 @@
+\documentclass{article}
+\usepackage[margin=1in]{geometry}
+\usepackage{graphicx}
+\usepackage{amsmath}
+\title{Homework 4}
+\begin{document}
+\maketitle
+\section*{Q1}
+\begin{itemize}
+    \item Heat is analagous to electric charge. Much like charge flows in electircal circuits,
+        from areas with higher electric potential to areas with lower electrical potential,
+        heat flows from areas of higher temperature to areas of lower temperature.
+    \item Thermal capacitance is analagous to electric capacitance. Much like materials of higher
+        thermal capacitance take more heat to increase in temperature, materals with higher
+        electrical capacitance require more charge to increase in voltage / electric potential.
+    \item As hinted at in the earlier question, temperature is analagous to voltage. Differences
+        in temperature / voltage cause the flow of heat / charge.
+    \item Just as heat is analagous to electric charge, heat flow is analagous to current.
+        Heat flow is propertional to the difference between temperature in two areas,
+        and electric current is propertional to differences in voltage / electric potential.
+\end{itemize}
+
+\subsection*{Q2}
+Radiation is the mode of heat transfer that occurs via EM waves. 
+
+\subsection*{Q3}
+Electrical current is easier to constrain, because we have much better electrical insulators
+than we do themral insulators. It's possible to have materials with electrical insulation
+much greater than $10^8$, but thermal insulation hovers in the thousands-tens of thousands range.
+
+\section*{Q4}
+We can reduce dynamic power by gating the clock signal. If we know that a circuit won't be in use for
+some time, we can use an \textsc{And} gate to prevent the clock signal from propagating into the
+unused part of the network. This is crucial, because the clock signal has an activity factor of $\alpha=1$,
+which means that components connected to the clock cost a lot of power. Timed components for which
+the clock signal has been turned off no longer switch at all, and thus do not draw or dump any power.
+
+Supply voltage can be lowered to significantly reduce power consumption. Since power is related
+quadratically to voltage, halving the supply voltage can reduce power consumption by a factor of four.
+By receiving external information about the load placd on the circuit, we can dynamically reduce the power
+(and slow down the circuit) at times when it's not in heavy use.
+
+\pagebreak
+\section*{Q5}
+\subsection*{Minimal delay}
+We are ingoring intrinsic delay. Using the book's log equation, we find:
+
+\begin{equation*}
+    \begin{aligned}
+    & \rho(1-\ln \rho) = 0 \\
+        \Rightarrow \quad & e = \rho \\
+    \end{aligned}
+\end{equation*}
+
+Since we have $B=1$ and $H=1$, our logical effort is $G = 10,000$, we have $F = BGH = 10,000$.
+The ideal number of stages is:
+
+\begin{equation*}
+    \log_\rho F = \ln F \approx 9
+\end{equation*}
+
+Then, with each of the 9 stages contributing stage contributing $10,000^{1/9}$ units of delay,
+the total delay becomes:
+
+\begin{equation*}
+    9 \times 10,000^{1/9} \approx 25
+\end{equation*}
+
+\subsection*{Reducing Power Using Supply}
+To reduce the power consumption by alterting supply, we need to reduce supply by a factor
+of 2, since the two are quadratically related.
+
+\subsection*{Reducing Power by Changing the Number of Stages}
+The total amount of capacitance is proportional to the sum of the sizes
+of the inverters in the circuit. This, in turn, is given by the geometric sequence:
+
+\begin{equation*}
+    1 + \hat{f} + \hat{f}^2 + ... + \hat{f}^{N-1} = \frac{\hat{f}^N-1}{\hat{f}-1}
+\end{equation*}
+
+Since $\hat{f} = \sqrt[N]{F}$, this simplifies to:
+
+\begin{equation*}
+    \frac{F-1}{\sqrt[n]{F}-1}
+\end{equation*}
+
+To determine the number of stages for $\frac{1}{4}$ power, we just solve:
+
+\begin{equation*}
+    \begin{aligned}
+        \frac{F-1}{\sqrt[n]{F}-1} & < \frac{1}{4} \frac{F-1}{\sqrt[9]{F}-1} \\
+        4(\sqrt[9]{F}-1) & < \sqrt[n]{F}-1 \\
+    \end{aligned}
+\end{equation*}
+
+Solving using Wolfram Alpha, we get $n \leq 4$. This brings us to a total delay
+of $4 \times 10,000^{(1/4)} = 40$.
+
+\pagebreak
+\section*{Q6}
+\begin{figure}[h]
+    \centering
+    \includegraphics[width=0.7\linewidth]{nand.png}
+    \caption{Step-up 2-input NAND gate.}
+\end{figure}
+
+\end{document}
diff --git a/HW5.tex b/HW5.tex
new file mode 100644
index 0000000..c96432d
--- /dev/null
+++ b/HW5.tex
@@ -0,0 +1,173 @@
+\documentclass{article}
+\usepackage[margin=1in]{geometry}
+\usepackage{graphicx}
+\usepackage{amsmath}
+\title{Homework 5}
+\begin{document}
+\maketitle
+\section*{Q1}
+\subsection*{Part a}
+In lecture, Scott said that he specifically wants the top path optimized. In this
+case, we insert a unit inverter at the fork in front of the bottom path.
+
+The minimal delay depends on the intrinsic and incremental delay of
+each gate. We thus assume the conditions of part c, that is,
+that the intrinsic and incremental delay are both 30ps.
+
+Solving for the ideal number of stages involves computing $\rho$. This, in turn, is
+done by solving the equation: $p_\text{inv} + \rho(1-\ln\rho) = 0$. Knowing that
+the intrinsic and parasitic delays are the same, we set $p_\text{inv}=1$, and
+thus obtain $\rho=3.59$. To find the ideal number of stages, we now need
+to compute $\log_\rho$ of our path effort F.
+
+To comptue F, we must first determine the logical effort of each of the gates
+used in the circuit. With a $\beta$ of 1.2, we get $g=\frac{3.2}{2.2}$ for the
+NAND gate, and $\frac{3.4}{2.2}$ for the NOR gate. Then, we have:
+
+\begin{equation*}
+    \begin{aligned}
+        G &= \left(\frac{3.2}{2.2}\right)\left(\frac{3.4}{2.2}\right) \\
+        H &= \frac{1000}{2} = 500 \quad \text{(for the top path)}
+    \end{aligned}
+\end{equation*}
+
+In this case, we have a branching effort of
+
+\begin{equation*}
+    2\left(\frac{1+s_{3,t}}{s_{3,t}}\right) \approx 2\times 1 = 2
+\end{equation*}
+
+We thus compute the following total effort:
+
+\begin{equation*}
+    \begin{aligned}
+        F &= \left(\frac{3.2}{2.2}\right)\left(\frac{3.4}{2.2}\right)\left(\frac{1000}{2}\right)2 \approx 2247 \\
+        \hat{N} &= \log_\rho F \approx 6
+    \end{aligned}
+\end{equation*}
+
+Here, assume the polarity of the signal doesn't matter, so that we really can insert an odd number
+of inverters. While adding inverters, we observe that we can transform the NOR gate into a NAND
+gate, thereby reducing the path effort slightly. Thus, we place one inverter right after the fork
+on the top path, and two inverters on the other side of the formerly-NOR-now-NAND gate. Recomputing
+total effort $F$ and stage effort $\hat{f}$:
+
+\begin{equation*}
+    \begin{aligned}
+        F &= \left(\frac{3.2}{2.2}\right)\left(\frac{3.2}{2.2}\right)\left(\frac{1000}{2}\right)2 \approx 2116 \\
+        \hat{f} &\approx 3.58
+    \end{aligned}
+\end{equation*}
+
+The parasitic delays of the two gates (relative to the inverter) are, even with $\beta=1.2$, stil
+equal to 2. Thus, we have a total delay of:
+
+\begin{equation*}
+    6\times 30 \times\hat{f} + 8 \times 30 \approx 885\textit{ps}
+\end{equation*}
+
+\pagebreak
+We can also compute gate sizes.
+
+\begin{equation*}
+  \begin{aligned}
+      s_{6,t} &= 1000/\hat{f} \approx 279 \\
+      s_{5,t} &= 1000/\hat{f}^2 \approx 77.9 \\
+      s_{4,t} &= \left(\frac{3.3}{2.2}\right)1000/\hat{f}^3 \approx 31.6 \\
+      s_{3,t} &= \left(\frac{3.3}{2.2}\right)1000/\hat{f}^4 \approx 8.83 \\
+      s_{2} &= \left(\frac{3.2}{2.2}\right)^2 1000/\hat{f}^5 \approx 3.58 \\
+      s_{1} &= 2\left(\frac{3.2}{2.2}\right)^2 1000/\hat{f}^6 \approx 2
+      % (3.4/2.2)(3.2/2.2)*1.5
+  \end{aligned}
+\end{equation*}
+
+\subsection*{Part b}
+
+The branching effort $B$ does not follow immediately from the circuit. However,
+given the constraint that the two delays must be the same, we find that
+the size of the top NOR gate must be twice the size of the bottom NOR
+nly 
+gate. This, in turn, leads to a branching effort (again, for the top path)
+of $1.5$. Thus, we get:
+
+\begin{equation*}
+    \begin{aligned}
+        B &= 2\left(\frac{3}{2}\right) \\
+        F &= \left(\frac{3.2}{2.2}\right)\left(\frac{3.2}{2.2}\right)\left(\frac{1000}{2}\right)2\left(\frac{3}{2}\right) \approx 3174 \\
+        \hat{N} &= \log_\rho F \approx 6
+    \end{aligned}
+\end{equation*}
+
+Here, assume the polarity of the signal doesn't matter, so that we really can insert an odd number
+of inverters at the end of each branch. The sizes are thus:
+
+\begin{equation*}
+  \begin{aligned}
+      s_{6,t} &= 1000/\hat{f} \approx 261 \\
+      s_{6,b} &= 500/\hat{f} \approx 131 \\
+      s_{5,t} &= 1000/\hat{f}^2 \approx 68.0 \\
+      s_{5,b} &= 1000/\hat{f}^2 \approx 34.0 \\
+      s_{4,t} &= \left(\frac{3.3}{2.2}\right)1000/\hat{f}^3 \approx 25.8 \\
+      s_{4,b} &= \left(\frac{3.3}{2.2}\right)1000/\hat{f}^3 \approx 12.9 \\
+      s_{3,t} &= \left(\frac{3.3}{2.2}\right)1000/\hat{f}^4 \approx 6.73 \\
+      s_{3,t} &= \left(\frac{3.3}{2.2}\right)1000/\hat{f}^4 \approx 3.37 \\
+      s_{2} &= \left(\frac{3.2}{2.2}\right)^2 1000/\hat{f}^5 \approx 3.83 \\
+      s_{1} &= 2\left(\frac{3.2}{2.2}\right)^2 1000/\hat{f}^6 \approx 2
+      % (3.4/2.2)(3.2/2.2)*1.5
+  \end{aligned}
+\end{equation*}
+
+The calculation for total delay is the same, except that a different value of $\hat{f}$ is used.
+
+\begin{equation*}
+    6\times 30 \times\hat{f} + 8 \times 30 \approx 930\textit{ps}
+\end{equation*}
+
+\pagebreak
+\subsection*{Part c}
+See parts a and b for the respective delay calculations.
+
+\subsection*{Part d}
+The ratios of the PMOS and NMOS transistors are given by $\frac{\beta}{\beta+2}$ and $\frac{2}{\beta+2}$. For part a, since the size of the gate itself is 3.58, the sizes of the PMOS and NMOS are, respectively, 1.345 and 2.24. For part b, when the gate has a size of 3.83, the transistors themselves come in at 1.44 and 2.39.
+
+\section*{Q2}
+As far as I know, there's no way to guarantee the resolution of a metastable state in any amount of time. Metastability is resolved through random perturbations (however small), and thus there's technically possibility that it doesn't resolve one way or the other. 
+
+\section*{Q3}
+With the ideal number of stages being 5, we can employ a trick. We need to insert two inverters;
+if we do this right after the NOR gate, we can "push" one of the bubbles, turning the structure
+into an inverter, followed by a NAND gate, followed by another inverter. The rest of the circuit
+remains the same: a 4-input NAND gate followed by an inverter. This way, we have a logical effort of:
+
+\begin{equation*}
+    \begin{aligned}
+        G &= \left(\frac{4}{3}\right)\times 2 = \frac{8}{3} \\
+        H &= c \\
+        B &= 1
+    \end{aligned}
+\end{equation*}
+Then, the total delay is:
+\begin{equation*}
+    \begin{aligned}
+        100\times5\times\left[\left(\frac{8}{3}\right)c\right]^{\frac{1}{5}} + 50\times (4+2+1+1+1) &= 2400 \\
+        \Rightarrow c &= 338
+    \end{aligned}
+\end{equation*}
+
+\section*{Q4}
+A wire without repeaters has its delay increase quadratically with length. This is because both the
+resistance and capacitance are proportional to length. On the other hand, wire delay increases
+only linearly with repaters. In particular, the following equation is given in the book:
+\begin{equation*}
+    t_{pd} = 1.81 \sqrt{\text{FO4} R_wC_w}l
+\end{equation*}
+Where l is the wire length.
+
+\section*{Q5}
+When nearby wires do not have the same signal as the wire itself, it effectively experiences
+additional capacitance from the coupling. Thus, it becomes harder to drive the wire (since
+more charge is effecitvely needed to change the voltage); this, in turn, increases delay.
+In fact, it almost behaves as though the wire is connected to the nearby wires via a very
+capacitive material; in that sense, it's \emph{almost} as if the branching effort is higher.
+
+\end{document}