Compare commits
36 Commits
cf73ec9016
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| f317a7e8da | |||
| 90a5aed6ec | |||
| c6d7795074 | |||
| d8f4a272e3 | |||
| ff0edb93bb | |||
| 39ec744562 | |||
| 6530e7ef8c | |||
| 71195df7c9 | |||
| 0c1d8611b1 | |||
| b99403a4ff | |||
| 9afa839bff | |||
| 6f99879b8f | |||
| fe52f689f9 | |||
| c171b0374b | |||
| eb8d068519 | |||
| 6db76f4fd3 | |||
| 64ee80be63 | |||
| 6b963c967b | |||
| 4d4ceddcc6 | |||
| 75381749d7 | |||
| d2f53a9a4f | |||
| 0f6426958e | |||
| 8b9eabdec1 | |||
| e077bb9071 | |||
| 76899cb8a3 | |||
| 12856ef152 | |||
| ca72f3eb3d | |||
| e5b0166d8c | |||
| 8ba9d02a8e | |||
| c866f63e8c | |||
| f3ffb39219 | |||
| f289e84389 | |||
| 5f8a49ab9a | |||
| 2010fcdf52 | |||
| 8285087e3f | |||
| b58f4df33e |
1602
final/SRAM.jelib
Normal file
290
final/SRAM_bits.cir
Normal file
@@ -0,0 +1,290 @@
|
|||||||
|
* This file contains all the subcircuits to be used in SRAM256.cir
|
||||||
|
|
||||||
|
***** long channel VTP = -0.9, VTN = 0.8 *****
|
||||||
|
*.include modelcard/1um.pm
|
||||||
|
*.param supply = 5
|
||||||
|
*.param ll = 1u
|
||||||
|
|
||||||
|
****** 50nm models***
|
||||||
|
|
||||||
|
|
||||||
|
.include ./modelcard/50nm.pm
|
||||||
|
.param supply =1
|
||||||
|
|
||||||
|
.param lambda=25nm
|
||||||
|
.param ll='2*lambda'
|
||||||
|
|
||||||
|
****** 16nm low power models***
|
||||||
|
*.include ./modelcard/PTM_LP/16nm.pm
|
||||||
|
*.param supply =0.9
|
||||||
|
*.param ll=16nm
|
||||||
|
|
||||||
|
****** 16nm high peformance models***
|
||||||
|
*.include ./modelcard/PTM_HP/16nm.pm
|
||||||
|
*.param supply =0.7
|
||||||
|
*.param ll=16nm
|
||||||
|
|
||||||
|
|
||||||
|
.subckt wire iot iof len=10 wid=10
|
||||||
|
.param rr=0.4
|
||||||
|
.param cc = '100e-15'
|
||||||
|
rt iot iof 'rr*len*50/(wid)'
|
||||||
|
cf iof 0 'cc*len*wid*50/1e6'
|
||||||
|
|
||||||
|
.ends
|
||||||
|
|
||||||
|
.subckt wire_dual lt rt lf rf len=10 wid=10
|
||||||
|
Xt lt rt wire len='len' wid='wid'
|
||||||
|
Xf lf rf wire len='len' wid='wid'
|
||||||
|
.ends
|
||||||
|
|
||||||
|
.subckt wire_precharge lt rt lf rf clk len=10 wid=10 ww=10
|
||||||
|
Xt lt rt wire len='len' wid='wid'
|
||||||
|
Xf lf rf wire len='len' wid='wid'
|
||||||
|
Xpt rt clk vdd pp ww='ww*2'
|
||||||
|
Xpf rf clk vdd pp ww='ww*2'
|
||||||
|
.ends
|
||||||
|
|
||||||
|
.subckt nn d g s ww=100
|
||||||
|
mnfet d g s 0 nmos L=ll w='ww*ll'
|
||||||
|
.ends
|
||||||
|
|
||||||
|
.subckt pp d g s ww=100
|
||||||
|
mpfet d g s vdd pmos L=ll w='ww*ll'
|
||||||
|
.ends
|
||||||
|
|
||||||
|
|
||||||
|
.subckt inv out inn size=30 beta=2
|
||||||
|
XPP out inn vdd pp ww='size*beta/(beta+1)'
|
||||||
|
XNN out inn gnd nn ww='size/(beta+1)'
|
||||||
|
.ends
|
||||||
|
|
||||||
|
.subckt nnd2 out in1 in0 size=30 beta=2
|
||||||
|
Xap0 out in0 vdd pp ww='beta*size/(beta+2)'
|
||||||
|
Xap1 out in1 vdd pp ww='beta*size/(beta+2)'
|
||||||
|
Xan0 out in0 nng nn ww='2*size/(beta+2)'
|
||||||
|
Xan1 nng in1 0 nn ww='2*size/(beta+2)'
|
||||||
|
.ends nnd2
|
||||||
|
|
||||||
|
.subckt nor2 out in1 in0 size=30 beta=2
|
||||||
|
Xap0 ppi in0 vdd pp ww='2*beta*size/(2*beta+1)'
|
||||||
|
Xap1 out in1 ppi pp ww='2*beta*size/(2*beta+1)'
|
||||||
|
Xan0 out in0 0 nn ww='1*size/(2*beta+1)'
|
||||||
|
Xan1 out in1 0 nn ww='1*size/(2*beta+1)'
|
||||||
|
.ends nor2
|
||||||
|
|
||||||
|
.subckt latch out inn clk clb size=15 beta=2
|
||||||
|
Xn inn clk qin nn ww='5'
|
||||||
|
Xp inn clb qin pp ww='10'
|
||||||
|
|
||||||
|
Xfp qin ggg vdd pp ww='5'
|
||||||
|
Xfn qin ggg gnd nn ww='5'
|
||||||
|
|
||||||
|
Xi ggg qin inv size='size'
|
||||||
|
Xo out ggg inv size='3*size'
|
||||||
|
.ends latch
|
||||||
|
|
||||||
|
.subckt flop qqq ddd clk
|
||||||
|
Xinve clb clk inv
|
||||||
|
Xflip int ddd clb clk latch
|
||||||
|
Xflop qqq int clk clb latch
|
||||||
|
.ends flop
|
||||||
|
|
||||||
|
.subckt reg8 ot7 ot6 ot5 ot4 ot3 ot2 ot1 ot0 in7 in6 in5 in4 in3 in2 in1 in0 clk
|
||||||
|
x7 ot7 in7 clk flop
|
||||||
|
x6 ot6 in6 clk flop
|
||||||
|
x5 ot5 in5 clk flop
|
||||||
|
x4 ot4 in4 clk flop
|
||||||
|
x3 ot3 in3 clk flop
|
||||||
|
x2 ot2 in2 clk flop
|
||||||
|
x1 ot1 in1 clk flop
|
||||||
|
x0 ot0 in0 clk flop
|
||||||
|
.ends reg8
|
||||||
|
|
||||||
|
.subckt dat1 out period=1ns start=1ns sz=50 total=5 duty=3
|
||||||
|
V0 j0 0 PULSE('supply' 0 'start' 10p 10p 'duty*period-10ps' 'total*period')
|
||||||
|
x7 out j0 inv size='sz'
|
||||||
|
.ends dat1
|
||||||
|
|
||||||
|
*generates different data stream on all eight channels, buffered output
|
||||||
|
.subckt dat8 o7 o6 o5 o4 o3 o2 o1 o0 per=1ns start=1ns size=50
|
||||||
|
V0 j0 0 PULSE(0 'supply' 'start' 10p 10p '0.5*per-10ps' 'per')
|
||||||
|
V1 j1 0 PULSE(0 'supply' 'start' 10p 10p '0.5*per-10ps' '2*per')
|
||||||
|
V2 j2 0 PULSE(0 'supply' 'start' 10p 10p '0.5*per-10ps' '3*per')
|
||||||
|
V3 j3 0 PULSE(0 'supply' 'start' 10p 10p '0.5*per-10ps' '4*per')
|
||||||
|
V4 j4 0 PULSE('supply' 0 'start' 10p 10p '0.5*per-10ps' '1*per')
|
||||||
|
V5 j5 0 PULSE('supply' 0 'start' 10p 10p '1*per-10ps' '2*per')
|
||||||
|
V6 j6 0 PULSE('supply' 0 'start' 10p 10p '1.5*per-10ps' '3*per')
|
||||||
|
V7 j7 0 PULSE('supply' 0 'start' 10p 10p '2*per-10ps' '4*per')
|
||||||
|
xb o7 o6 o5 o4 o3 o2 o1 o0 j7 j6 j5 j4 j3 j2 j1 j0 buf8 sz='size'
|
||||||
|
.ends dat8
|
||||||
|
|
||||||
|
.subckt buf8 ot7 ot6 ot5 ot4 ot3 ot2 ot1 ot0 in7 in6 in5 in4 in3 in2 in1 in0 sz=100
|
||||||
|
x7 ot7 in7 inv size='sz'
|
||||||
|
x6 ot6 in6 inv size='sz'
|
||||||
|
x5 ot5 in5 inv size='sz'
|
||||||
|
x4 ot4 in4 inv size='sz'
|
||||||
|
x3 ot3 in3 inv size='sz'
|
||||||
|
x2 ot2 in2 inv size='sz'
|
||||||
|
x1 ot1 in1 inv size='sz'
|
||||||
|
x0 ot0 in0 inv size='sz'
|
||||||
|
.ends buf8
|
||||||
|
|
||||||
|
|
||||||
|
.subckt nnd3 out in2 in1 in0 size=20 beta=2
|
||||||
|
Xp0 out in0 vdd pp ww='beta*size/(beta+3)'
|
||||||
|
Xp1 out in1 vdd pp ww='beta*size/(beta+3)'
|
||||||
|
Xp2 out in2 vdd pp ww='beta*size/(beta+3)'
|
||||||
|
Xn0 out in0 nn0 nn ww='3*size/(beta+3)'
|
||||||
|
Xn1 nn0 in1 nn1 nn ww='3*size/(beta+3)'
|
||||||
|
Xn2 nn1 in2 gnd nn ww='3*size/(beta+3)'
|
||||||
|
.ends
|
||||||
|
|
||||||
|
.subckt senseAmp ot1 ot0 in1 in0 eva size=40
|
||||||
|
Xn0 ot0 in0 ot1 eva nnd3 size ='size'
|
||||||
|
Xn1 ot1 in1 ot0 eva nnd3 size ='size'
|
||||||
|
.ends senseAmp
|
||||||
|
|
||||||
|
.subckt iSenseAmp ot1 ot0 in1 in0 eva size=40
|
||||||
|
Xp1 ot1 eva vdd pp ww='size'
|
||||||
|
Xp2 ot1 ot0 vdd pp ww='size'
|
||||||
|
Xp3 ot0 eva vdd pp ww='size'
|
||||||
|
Xp4 ot0 ot1 vdd pp ww='size'
|
||||||
|
Xn1 ot1 ot0 nn1 nn ww='size'
|
||||||
|
Xn2 ot0 ot1 nn0 nn ww='size'
|
||||||
|
Xn3 nn1 in1 pd nn ww='size'
|
||||||
|
Xn4 nn0 in0 pd nn ww='size'
|
||||||
|
Xn5 pd eva gnd nn ww='size'
|
||||||
|
.ends
|
||||||
|
|
||||||
|
.subckt precharge charge rwtb clk diib
|
||||||
|
Xrdi rdi rwtb diib nnd2
|
||||||
|
Xnn chargeb clk rdi nnd2
|
||||||
|
Xout charge chargeb inv
|
||||||
|
.ends precharge
|
||||||
|
|
||||||
|
.subckt write1 btt bff dii rwt clk
|
||||||
|
* TODO: sizes
|
||||||
|
Xclk clkb clk inv size='25'
|
||||||
|
Xdii diib dii inv size='25'
|
||||||
|
Xrwt rwtb rwt inv size='25'
|
||||||
|
Xrwn dorw clkb rwt nor2 size='50'
|
||||||
|
Xdt pdt dii gnd nn ww='100'
|
||||||
|
Xdf pdf diib gnd nn ww='100'
|
||||||
|
Xwt btt dorw pdt nn ww='100'
|
||||||
|
Xwf bff dorw pdf nn ww='100'
|
||||||
|
Xpcet pcet rwtb clk diib precharge
|
||||||
|
Xpcef pcef rwtb clk dii precharge
|
||||||
|
Xpct btt clk vdd pp ww='100'
|
||||||
|
Xpcf bff clk vdd pp ww='100'
|
||||||
|
.ends write1
|
||||||
|
|
||||||
|
.subckt iWrite1 btt bff dii rwt en clk
|
||||||
|
* TODO: sizes
|
||||||
|
Xclk clkb clk inv size='40'
|
||||||
|
Xdii diib dii inv size='40'
|
||||||
|
Xrwt rwtb rwt inv size='40'
|
||||||
|
Xrwn dorw clkb rwt nor2 size='110'
|
||||||
|
Xdt pdt dii gnd nn ww='200'
|
||||||
|
Xdf pdf diib gnd nn ww='200'
|
||||||
|
Xwt btt dorw pdt nn ww='200'
|
||||||
|
Xwf bff dorw pdf nn ww='200'
|
||||||
|
Xpcet pcet rwtb clk diib precharge
|
||||||
|
Xpcef pcef rwtb clk dii precharge
|
||||||
|
Xpct btt pcet vdd pp ww='100'
|
||||||
|
Xpcf bff pcef vdd pp ww='100'
|
||||||
|
.ends write1
|
||||||
|
|
||||||
|
.subckt read1 btt bff dot rwt clk
|
||||||
|
Xnd trigger rwt clk nnd2
|
||||||
|
Xinv triggerb trigger inv
|
||||||
|
Xamp set reset btt bff triggerb senseAmp size='40'
|
||||||
|
Xinv1 set1 set inv
|
||||||
|
Xinv2 set2 set1 inv
|
||||||
|
Xinv3 reset1 reset inv
|
||||||
|
* Old setup:
|
||||||
|
* Xp nn1 set2 vdd pp
|
||||||
|
* Xn nn1 reset1 gnd nn
|
||||||
|
* Xh1 dot nn1 inv
|
||||||
|
* Xh2 nn1 dot inv
|
||||||
|
Xp dot set2 vdd pp
|
||||||
|
Xn dot reset1 gnd nn
|
||||||
|
Xh1 dot nn1 inv
|
||||||
|
Xh2 nn1 dot inv
|
||||||
|
.ends read1
|
||||||
|
|
||||||
|
.subckt readSub btt bff set rst rwt clk en
|
||||||
|
Xnd trigger rwt en clk nnd3
|
||||||
|
Xinv triggerb trigger inv size='40'
|
||||||
|
Xamp set rst btt bff triggerb senseAmp size='200'
|
||||||
|
.ends read1
|
||||||
|
|
||||||
|
.subckt iReadSub btt bff set rst rwt clk en
|
||||||
|
Xnd trigger rwt en clk nnd3
|
||||||
|
Xinv triggerb trigger inv size='40'
|
||||||
|
Xamp set rst btt bff triggerb iSenseAmp size='40'
|
||||||
|
.ends read1
|
||||||
|
|
||||||
|
.subckt readcollect dot set0 rst0 set1 rst1 set2 rst2 set3 rst3
|
||||||
|
Xset01 set01 set0 set1 nnd2
|
||||||
|
Xset23 set23 set2 set3 nnd2
|
||||||
|
Xrst01 rst01 rst0 rst1 nnd2
|
||||||
|
Xrst23 rst23 rst2 rst3 nnd2
|
||||||
|
Xnset01 nset01 set01 inv
|
||||||
|
Xnset23 nset23 set23 inv
|
||||||
|
Xp01 nn1 nset01 vdd pp
|
||||||
|
Xp23 nn1 nset23 vdd pp
|
||||||
|
Xn01 nn1 rst01 gnd nn
|
||||||
|
Xn23 nn1 rst23 gnd nn
|
||||||
|
Xh1 dot nn1 inv size='60'
|
||||||
|
Xh2 nn1 dot inv size='60'
|
||||||
|
.ends readCollect
|
||||||
|
|
||||||
|
|
||||||
|
.subckt decode2 o11 o10 o01 o00 di1 di0 df1 df0
|
||||||
|
|
||||||
|
.ends
|
||||||
|
|
||||||
|
|
||||||
|
.subckt decode_nor16
|
||||||
|
|
||||||
|
.ends
|
||||||
|
|
||||||
|
.subckt decode_nnd16
|
||||||
|
|
||||||
|
.ends
|
||||||
|
|
||||||
|
|
||||||
|
.subckt decode_16and1
|
||||||
|
|
||||||
|
.ends decode_16and1
|
||||||
|
|
||||||
|
|
||||||
|
.subckt dmux256 o255 o223 0012 o001 dt7 dt6 dt5 d4 dt3 dt1 dt0
|
||||||
|
|
||||||
|
.ends dmux256
|
||||||
|
|
||||||
|
|
||||||
|
.subckt decModel choose din clk size='20'
|
||||||
|
Xi1 nn1 din inv size='size'
|
||||||
|
* Here: stopped using i1 and just used din
|
||||||
|
Xnal ww1 gnd din nnd2 size='size*4'
|
||||||
|
Xnar nn2 vdd din nnd2 size='size'
|
||||||
|
Xnrl ww2 nn2 vdd nor2 size='size*3'
|
||||||
|
Xnrr nn3 nn2 gnd nor2 size='size'
|
||||||
|
Xna2l ww3 gnd nn3 nnd2 size='size*15'
|
||||||
|
Xna2r nn4 vdd nn3 nnd2 size='size'
|
||||||
|
Xi2 nn5 nn4 inv size='size'
|
||||||
|
Xnac nn6 nn5 clk nnd2 size='size'
|
||||||
|
Xi3 choose nn6 inv size='size'
|
||||||
|
.ends
|
||||||
|
|
||||||
|
.subckt mem1 bt bf ope
|
||||||
|
Xpt tt ff vdd pp ww='5'
|
||||||
|
Xnt tt ff gnd nn ww='5'
|
||||||
|
Xpf ff tt vdd pp ww='5'
|
||||||
|
Xnf ff tt gnd nn ww='5'
|
||||||
|
Xat bt ope tt nn ww='5'
|
||||||
|
Xaf bf ope ff nn ww='5'
|
||||||
|
.ends
|
||||||
|
|
||||||
BIN
final/amp.png
Normal file
|
After Width: | Height: | Size: 53 KiB |
BIN
final/decoder.png
Normal file
|
After Width: | Height: | Size: 11 KiB |
BIN
final/layout_arrayed.png
Normal file
|
After Width: | Height: | Size: 394 KiB |
BIN
final/layout_arrayed_closeup.png
Normal file
|
After Width: | Height: | Size: 100 KiB |
BIN
final/layout_single.png
Normal file
|
After Width: | Height: | Size: 210 KiB |
BIN
final/read_select.png
Normal file
|
After Width: | Height: | Size: 24 KiB |
387
final/report.tex
Normal file
@@ -0,0 +1,387 @@
|
|||||||
|
\documentclass{article}
|
||||||
|
\usepackage[margin=1in]{geometry}
|
||||||
|
\usepackage{graphicx}
|
||||||
|
\usepackage{amsmath}
|
||||||
|
\usepackage{hyperref}
|
||||||
|
\usepackage{xcolor}
|
||||||
|
\usepackage{caption}
|
||||||
|
\usepackage{subcaption}
|
||||||
|
\definecolor{link}{HTML}{006275}
|
||||||
|
\hypersetup{
|
||||||
|
colorlinks,
|
||||||
|
citecolor=black,
|
||||||
|
filecolor=black,
|
||||||
|
linkcolor=link,
|
||||||
|
urlcolor=black
|
||||||
|
}
|
||||||
|
\title{Final Project Report}
|
||||||
|
\author{Danila Fedorin}
|
||||||
|
\begin{document}
|
||||||
|
\maketitle
|
||||||
|
\tableofcontents
|
||||||
|
\pagebreak
|
||||||
|
\section{General Design and Considerations}
|
||||||
|
The goal of this assignment was to create a 256-byte SRAM memory unit. In order
|
||||||
|
to minimize wire delays, I chose to split each bit into \textbf{4 columns of 64 SRAM cells
|
||||||
|
each}. This was motivated by the following factors:
|
||||||
|
\begin{itemize}
|
||||||
|
\item \emph{Larger} columns were eliminated due to the high cost of interconnect.
|
||||||
|
Even large write blocks were not able to charge the ``far ends'' of the wire
|
||||||
|
at shorter clock cycles. Increasing wire width did not help; although resistance
|
||||||
|
decreased, the capacitance increased, leading to small net gains. Thus, I made
|
||||||
|
the decision to shrink the columns as much as possible. However...
|
||||||
|
\item \emph{Smaller} columns became a routing challenge. Even with a 4-column split,
|
||||||
|
to properly connect each cell of the SRAM column, the SRAM cells themselves need
|
||||||
|
to accommodate an additional three \textsc{Wl} lines. Due to the pitch requirements
|
||||||
|
on metals three and four, this is the upper limit (for reasonably sized cells).
|
||||||
|
Alternatives included splitting the decoder into pieces, but for large numbers
|
||||||
|
of columns, this meant that the decoder signal traveled through significant amounts
|
||||||
|
of wire, and was thus slower.
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
For each of the 4 64-bit columns, I attached separate read and write blocks. However,
|
||||||
|
my placement of the write block was unorthodox. I observed that, although the write block
|
||||||
|
is perfectly capable of quickly manipulating the bitlines close to it, the changes
|
||||||
|
to the wires take too long to propagate through to the end. I addressed this with two separate
|
||||||
|
changes:
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item I added \textbf{additional precharge transistors} along the column, a total of 4.
|
||||||
|
Each was sized at $10\lambda$, much like the SRAM transistors themselves. When the clock
|
||||||
|
was low, these PMOS transistors became transparent, and helped precharge the bitlines faster.
|
||||||
|
Doing so helped avid hysteresis. However, this did not help with writing during high clock,
|
||||||
|
so...
|
||||||
|
\item I also \textbf{placed the write block in the middle of the column}. This increased the distance
|
||||||
|
between my furthest SRAM cell and the read block (since the write block now contributed to wire
|
||||||
|
length). However, this made it significantly easier to drive the entire length of the wire,
|
||||||
|
which was my main bottleneck. This was because the maximum distance from the write
|
||||||
|
block to any cell in the column was halved. Since my read circuit continued to work in this
|
||||||
|
configuration, I did not place it in the middle of the column, as that would needlessly
|
||||||
|
increase the length of the wires.
|
||||||
|
\end{itemize}
|
||||||
|
%
|
||||||
|
This led to the configuration shown in Figure \ref{fig:top-design}. To simulate this design, I \textbf{tested three configurations}:
|
||||||
|
\begin{enumerate}
|
||||||
|
\item A memory cell at the very top of my column, which is the furthest spot from both the read and write.
|
||||||
|
This is the simulation in the figure.
|
||||||
|
\item A memory cell in the middle of my column, in the same place as the write block. Since the write block
|
||||||
|
has brief ``false starts'', this test was to ensure that the read block can still pick up data
|
||||||
|
despite the write block's misfires.
|
||||||
|
\item A memory cell at the very bottom of my column. This area has additional capacitance from the read block;
|
||||||
|
it thus takes longer to charge up, and tends to be the first spot where writes fail.
|
||||||
|
circuit.
|
||||||
|
%
|
||||||
|
\end{enumerate}
|
||||||
|
I also split the wire into 4 equally-sized fragments, each with resistance $\frac{R}{4}$ and
|
||||||
|
capacitance $\frac{C}{4}$. Between each fragment, I added the aforementioned $10\lambda$ precharge
|
||||||
|
transistors, as well as 16 always-off $5\lambda$ transistors, which simulated the remaining memory cells.
|
||||||
|
I also placed \textsc{Din}, \textsc{Ad0}, and \textsc{Rwt} behind the default-sized flip-flops
|
||||||
|
attached to the clock to simulate something like a pipeline stage. My overall design is shown
|
||||||
|
in Figure \ref{fig:top-design-sim}.
|
||||||
|
|
||||||
|
\pagebreak
|
||||||
|
\begin{figure}[h]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=\linewidth]{toplevel_design.png}
|
||||||
|
\caption{Top-level design for a single bit.}
|
||||||
|
\label{fig:top-design}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
My SRAM cell ended up being $30\lambda$ units tall when arrayed. With
|
||||||
|
a total of 64 cells in a single column, this led to a wire length of $1920\lambda$.
|
||||||
|
However, since my write block was now included in the column, I added another $300\lambda$
|
||||||
|
of length to this number, to a total of roughly $2200\lambda$.
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.6\linewidth]{toplevel.png}
|
||||||
|
\caption{Architecture of top-level simulation.}
|
||||||
|
\label{fig:top-design-sim}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\pagebreak
|
||||||
|
\section{Performance Results}
|
||||||
|
I was able to clock my design at \textbf{$1.3\textit{ns}$}.
|
||||||
|
%
|
||||||
|
I realize that this isn't as fast as everyone else, but I ask that you take
|
||||||
|
into consideration the fact that \textbf{I was working with the old wire model}
|
||||||
|
until about an hour before the final due date (since I didn't know the wire model changed).
|
||||||
|
If I knew earlier, I'd have more time to optimize my design for the timings associated
|
||||||
|
with the new model.
|
||||||
|
%
|
||||||
|
Two factors lead to this upper limit.
|
||||||
|
%
|
||||||
|
\begin{itemize}
|
||||||
|
\item \textit{Write capacitance} makes it increasingly difficult to overwrite the value
|
||||||
|
in the cell. Clocking my design any faster leads my cell to \textit{almost} flip, but not resolve correctly.
|
||||||
|
I have found no way to work around these limits once my wire was properly sized, and my
|
||||||
|
write block was placed in the middle of the column.
|
||||||
|
\item \textit{Flop, decoder, and read delays} are the major limitation when both the inputs
|
||||||
|
and the outputs of the circuit are connected to flip flops. The most significant
|
||||||
|
instance of this issue is my write block: both \textsc{Din} and \textsc{Rwt} arrive
|
||||||
|
around $300\textit{ps}$ into the cycle. This means two things: a) if the previous
|
||||||
|
operation was ``read'', then the block does not start writing until halfway into
|
||||||
|
the positive phase of the clock and b) if the data being written is different
|
||||||
|
from the data in the previous cycle, for half the time, the write block will write
|
||||||
|
the old data (until the flip flop switches).
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\section{Components}
|
||||||
|
\subsection{Decoder}
|
||||||
|
\subsubsection{In My Own Words}
|
||||||
|
The decoder in this design is \textit{almost} the exact same one as we were given in lecture.
|
||||||
|
It computes all combinations of two consecutive bits using a \textsc{Nand} gate; for
|
||||||
|
each combination, there are 4 adjacent two-bit combinations,
|
||||||
|
leading to a 4 \textsc{Nor} gates connected to each \textsc{Nand}. There are now
|
||||||
|
16 combinations of 4 adjacent bits; each combination of the lower 4 bits
|
||||||
|
needs to be compared with each of the 16 combinations of the upper 4 bits,
|
||||||
|
leading to 16 \textsc{Nand} gates connected to each \textsc{Nor}. This
|
||||||
|
results in 256 unique \textsc{Wl} wires. Finally, these need to be attached
|
||||||
|
to the clock, so that cells aren't open randomly. This is done using an \textsc{And}
|
||||||
|
gate (a \textsc{Nand} followed by an inverter).
|
||||||
|
|
||||||
|
I adjusted this design to account for the address signals that need to be fed
|
||||||
|
into the write blocks. Which of the read/write columns is triggered
|
||||||
|
depends on the upper two bits of the address (since we have 4 columns). I modeled
|
||||||
|
this by increasing the fanout on the first \textsc{Nand} gate from 1 to 4.
|
||||||
|
This is pessimistic; each 2-bit combination would only feed into one write block,
|
||||||
|
whose trigger gate is normally sized.
|
||||||
|
|
||||||
|
\begin{figure}[h]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=\linewidth]{decoder.png}
|
||||||
|
\caption{Decoder model used in project.}
|
||||||
|
\label{fig:decoder}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
% TODO: Domino logic
|
||||||
|
% TODO: More inverters?
|
||||||
|
|
||||||
|
\pagebreak
|
||||||
|
\subsection{Read Block}
|
||||||
|
\subsubsection{In My Own Words}
|
||||||
|
The read block uses a \emph{sense amplifier} to detect small changes on the bitlines,
|
||||||
|
which it then translates into a zero-or-one output. The changes in the wires are below
|
||||||
|
the threshold of what could be considered digital logic; all the sense amplifier
|
||||||
|
designs I've come across rely on metastability, a state in which even tiny fluctuations
|
||||||
|
can significantly alter the outcome\footnote{My favorite analogy is a pencil balanced on its tip.
|
||||||
|
Technically, it's stable; however, even a small air current -- one you can't feel -- can knock it over.}.
|
||||||
|
The \textsc{Trigger} signal, which depends on the clock and \textsc{Rwt}, puts the amplifier
|
||||||
|
into a metastable state. From there, the connected bitlines cause it to resolve one way
|
||||||
|
or another. Finally, if one of the wires resolves, a value is written into the keeper circuit
|
||||||
|
at the end, which ensures that the value that was read continues to be expressed until
|
||||||
|
the next read operation.
|
||||||
|
|
||||||
|
\subsubsection{Details}
|
||||||
|
For my read block, I used a different sense amplifier design. The design based
|
||||||
|
on the two \textsc{Nand3} gates was easy to understand and build, but was less
|
||||||
|
sensitive, and tended to behave strangely under pressure. This led to difficulties
|
||||||
|
with debugging (the output would, for instance, flip completely at certain
|
||||||
|
wire widths), and was seemingly random. Instead, I used
|
||||||
|
an \textbf{improved latch-based sense amplifier design} from \cite{210039}. % TODO: cite
|
||||||
|
The design I used is shown in Figure \ref{fig:latch-amp}.
|
||||||
|
I left it sized at $40\lambda$, since larger amplifiers seem to take longer
|
||||||
|
to trigger and exit metastability.
|
||||||
|
|
||||||
|
The read block is not a particular bottleneck in this design. The main concern
|
||||||
|
was to handle the \textbf{``false start'' activation of the write block}. Because the \textsc{Rwt}
|
||||||
|
input is behind a latch, it takes nearly $300\textit{ps}$ to pull up or down after
|
||||||
|
the initial clock. Thus, if a write occurred during a previous cycle, the write block will
|
||||||
|
activate for a short period of time before the read block does. The memory cell
|
||||||
|
will overpower this initial misfire\footnote{According to my additional simulations, this is true even when the memory cell is close to the write block.}, but in this case, both \textsc{Bt} and \textsc{Bf}
|
||||||
|
will be below \textsc{Vdd}. The ``improved sense amplifier'' seems to handle this
|
||||||
|
case better than the one based on two \textsc{Nand} gates.
|
||||||
|
|
||||||
|
The latch-induced delay in \textsc{Rwt} also causes a strange \textsc{Trigger} signal during write operations
|
||||||
|
directly following read operations. The trigger signal initialy activates, putting the sense
|
||||||
|
amplifier into metastability; however, the correct \textsc{Rwt} value arrives before the
|
||||||
|
sense amp's outputs are compromised. If this became a problem, I would add an additional,
|
||||||
|
delayed clock signal \emph{after} the sense amplifier, and use an \textsc{And} gate
|
||||||
|
to delay the read block's output.
|
||||||
|
|
||||||
|
\begin{figure}[h]
|
||||||
|
\centering
|
||||||
|
\begin{subfigure}{.5\textwidth}
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=.7\linewidth]{amp.png}
|
||||||
|
\caption{The latch-based sense amplifier from \cite{210039}.}
|
||||||
|
\label{fig:latch-amp}
|
||||||
|
\end{subfigure}%
|
||||||
|
\begin{subfigure}{.5\textwidth}
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=.8\linewidth]{read_select.png}
|
||||||
|
\caption{The block gathering signals from the four columns.}
|
||||||
|
\label{fig:read-collect}
|
||||||
|
\end{subfigure}
|
||||||
|
\caption{Read block schematics}
|
||||||
|
\label{fig:read}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\pagebreak
|
||||||
|
\subsection{Write Block}
|
||||||
|
\subsubsection{In My Own Words}
|
||||||
|
The write block converts a ``data in'', or \textsc{Din}, signal
|
||||||
|
into a one-hot representation. It does so by pulling one of the bitlines high, and the other
|
||||||
|
low. Once the memory cell connects to the bitlines, it takes on the charge provided by the
|
||||||
|
write block, and is therefore overwritten. In my design, two PMOS transistors for each bitline
|
||||||
|
are used to pull down; one of the transistors is triggered by the \textsc{Din} signal (which wire
|
||||||
|
we pull down depends on the signal itself!), and the other by a combination of the clock
|
||||||
|
and \textsc{Rwt} (we don't want to touch the wires when reading!).
|
||||||
|
|
||||||
|
\subsubsection{Details}
|
||||||
|
My write block was not significantly different from the original design. Under the assumption
|
||||||
|
that data arrives first, I placed the transistors attached to \textsc{Din} and $\overline{\textsc{Din}}$
|
||||||
|
close to \textsc{Gnd}, each followed by a transistor attached to the ``write'' signal.
|
||||||
|
I also configured the write block to only precharge when the clock is low.
|
||||||
|
|
||||||
|
I experimented with making the write block pull wires up when writing (during high clock). However,
|
||||||
|
I did not find this to be of significant use. Since the wires are initially precharged,
|
||||||
|
there is no more time spent on charging them up; furthermore, the memory cell being written to
|
||||||
|
does not have enough ``strength'' to pull the wire down enough.
|
||||||
|
|
||||||
|
A curiosity of this design is that reads didn't seem to work with hich clock speeds. When enough
|
||||||
|
time is spent reading the wires, the memory cell in question is able to gradually exhaust the amount
|
||||||
|
of charge on one of these wires. Since the original, \textsc{Nand}-based sense amplifier required
|
||||||
|
all inputs to be high to properly function, this led to it eventually ``flipping'' and producing
|
||||||
|
the wrong output. This was only an issue above $5\textit{ns}$, and only with the original sense amplifier
|
||||||
|
design, though. I think that both Reed and
|
||||||
|
Graham experienced this occurrence -- they seemed to post very similar waveforms
|
||||||
|
to the community Discord group chat.
|
||||||
|
|
||||||
|
One thing to note about the write block is that its \textbf{clock input is deliberately delayed} compared
|
||||||
|
to the ``actual'' clock. This is because of an issue with \textsc{Din}. Since this
|
||||||
|
input is behind a latch, it takes around $300\textit{ps}$ to arrive after the rising clock
|
||||||
|
edge. If the previous value of \textsc{Din} was different than its current one, the write
|
||||||
|
block will start writing the wrong value. This will typically mean that the block cannot properly
|
||||||
|
perform the write. The delay on the clock input serves to mitigate this issue, by giving more
|
||||||
|
time for \textsc{Din} to settle before starting to write. To compensate for this delay, I sized
|
||||||
|
the write block's pull down transistors quite large ($100\lambda$), so that they can pull
|
||||||
|
the wire down, even starting $300\textit{ps}$ into the cycle. This is why the ``clock'' input
|
||||||
|
in my diagrams is colored black, unlike every other clocked component. The delay is achieved
|
||||||
|
by 6 sequenced inverters, two of which are sized 10x larger than the rest.
|
||||||
|
|
||||||
|
\begin{figure}[h]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.65\linewidth]{write.png}
|
||||||
|
\caption{Write block used in this project.}
|
||||||
|
\label{fig:write}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\pagebreak
|
||||||
|
\subsection{Memory Cell}
|
||||||
|
\subsubsection{In My Own Words}
|
||||||
|
The memory cell consists of two cross coupled inverters whose outputs
|
||||||
|
are disconnected from the bitlines by two additional nMOS transistors. When disconnected,
|
||||||
|
this cell reliably holds its value; one inverter's output turns off the other, and symmetrically,
|
||||||
|
the ``off'' output of that other inverter keeps the first one on. However, this cell is pretty
|
||||||
|
small; all of its transistors have size $5\lambda$ is the smallest size that can be properly
|
||||||
|
connected with a standard $2\lambda\times2\lambda$ via. Thus, when the ``write line'' (signal
|
||||||
|
connected to the gates of the two outside transistors) is asserted, the charge from the
|
||||||
|
surrounding bitlines can easily overpower the cell, causing it to switch to a different value.
|
||||||
|
|
||||||
|
\subsubsection{Details}
|
||||||
|
There are few notable things about my cell design. Even though it was recommended that we only
|
||||||
|
use metals one and two for the internal wiring, I went up to metal three for cross-connecting
|
||||||
|
the two internal inverters. This was the only way I found to keep the height of the cell to
|
||||||
|
minimum. This limited my routing options somewhat; to compensate, I also used metal three for
|
||||||
|
the vertical wires, \textsc{Bt} and \textsc{Bf}. This allowed me to use metal four for the
|
||||||
|
\textsc{Wl} (access) signal. Since this was the only use of metal four, I had enough free
|
||||||
|
room to route thee additional \textsc{Wl} signals to the remaining three columns.
|
||||||
|
|
||||||
|
My general principle for designing the layout was that, in an 12-bit, 4-column design, \textbf{a single
|
||||||
|
unit of height costs as much as 64 units of width}. Thus, I was fairly liberal with my layout's
|
||||||
|
width, but made sure to minimize the height of the design. The most significant bottleneck
|
||||||
|
was the gate oxide ``poking out'' of the ends of the design. In total, I was able to achieve
|
||||||
|
a height of $30\lambda$ when arrayed.
|
||||||
|
|
||||||
|
Other designs with smaller height were possible, but I found them undesirable. For instance,
|
||||||
|
Reed's now-famous design used a significant amount of high-level metals to achieve its tiny,
|
||||||
|
almost square area. This, however, makes routing \textsc{Wl} signals fairly complicated. They either
|
||||||
|
need to go to yet another layer of metal, or the decoder needs to be split into 4 pieces. The former
|
||||||
|
is undesirable as per the requirements for this assignment; the latter incurs the cost of additional
|
||||||
|
decoder hardware between columns, thereby significantly increasing the wire length and signal
|
||||||
|
delays. Since delays incurred by the flip flops and other signals are already becoming
|
||||||
|
a significant factor in my design, I thought it would be best to avoid such delays.
|
||||||
|
|
||||||
|
Other ideas I am aware of include putting \textit{all} the transistors in a single, horizontal line.
|
||||||
|
While this certainly succeeds at reducing the height, it incurs all the same issues described
|
||||||
|
above - it becomes nigh impossible to wire further \textsc{Wl} lines through each column,
|
||||||
|
unless the decoder is split into bits, in which case the width of the entire assembly drastically increases,
|
||||||
|
slowing down all signals.
|
||||||
|
|
||||||
|
\begin{figure}[h]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.5\linewidth]{layout_single.png}
|
||||||
|
\caption{Electric layout for a single cell.}
|
||||||
|
\label{fig:layout-cell}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\pagebreak
|
||||||
|
My basic cell is shown in Figure \ref{fig:layout-cell}. The arrayed version (in Figure \ref{fig:layout-arrayed})
|
||||||
|
merits additional explanation. In my earlier description of the overall design, I mentioned
|
||||||
|
that I have precharge PMOS transistors. I have integrated these into my layout to accurately model
|
||||||
|
my design. I also made them $10\lambda$ wide, since this is, at the time of writing,
|
||||||
|
the size of my 4 precharge transistors. In the bird's eye view (Figure \ref{fig:layout-arrayed-far}),
|
||||||
|
three things can be observed:
|
||||||
|
\begin{itemize}
|
||||||
|
\item \textit{Additional vertical line:} This line represents the clock signal,
|
||||||
|
which must be fed to the precharge transistors. In the full design, there would
|
||||||
|
be 5 clock lines (3 shared, and 2 on either side).
|
||||||
|
\item \textit{``Empty'' space between nodes:} I left this space because I was not sure
|
||||||
|
how wide I would end up making my \textsc{Bt} and \textsc{Bf} wires. I have measured
|
||||||
|
the distance to ensure that the design will remain DRC clean with up to \textbf{$8\lambda$-wide bitlines}.
|
||||||
|
This appears to be a sweet spot for my design, anyway.
|
||||||
|
\item \textit{Moved well contacts:} I have moved my well contacts to the region between
|
||||||
|
two columns. By extending the N- and P-wells to this area, I was able to
|
||||||
|
share a single contact between two cells, leaving room for prechare transistors
|
||||||
|
on both sides of the cell. This was partially inspired by Reed's compact cell design,
|
||||||
|
which shared a single contact between two cells\footnote{I am operating based on your
|
||||||
|
comment that well contacts for every cell are significantly overkill.}.
|
||||||
|
\end{itemize}
|
||||||
|
Figure \ref{fig:layout-arrayed-close} shows a closer view of the design. Due to the additional
|
||||||
|
space incurred, an entire column is approximately $100\lambda$ wide.
|
||||||
|
|
||||||
|
\begin{figure}[h]
|
||||||
|
\centering
|
||||||
|
\begin{subfigure}{.5\textwidth}
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=.7\linewidth]{layout_arrayed.png}
|
||||||
|
\caption{Bird's eye view of the arrayed SRAM cells.}
|
||||||
|
\label{fig:layout-arrayed-far}
|
||||||
|
\end{subfigure}%
|
||||||
|
\begin{subfigure}{.5\textwidth}
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=.8\linewidth]{layout_arrayed_closeup.png}
|
||||||
|
\caption{Close up from arrayed SRAM cells.}
|
||||||
|
\label{fig:layout-arrayed-close}
|
||||||
|
\end{subfigure}
|
||||||
|
\caption{Read block schematics}
|
||||||
|
\label{fig:layout-arrayed}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\pagebreak
|
||||||
|
\section{Further Design Ideas}
|
||||||
|
I discovered -- from other people in the class -- that an 8-column design was plausible.
|
||||||
|
Unfortunately, I was only convinced a day or so before the project was due, which did not give me
|
||||||
|
enough time to redesign my SRAM. I have seen students successfully using
|
||||||
|
the 8-column design by sharing \textsc{Wl} wires for each 'row', and using
|
||||||
|
the remaining 3 bits to enable and disable the write block. Since reading does
|
||||||
|
not change the cell value, this is a viable approach; all 8 columns would ``read''
|
||||||
|
(except during writing, in which 7 columns would read and 1 would write). As
|
||||||
|
long as a proper address selection mechanism is implemented into the read collector
|
||||||
|
circuit (which at present cannot handle concurrent reads), this would work just
|
||||||
|
fine, albeit at the expense of added power consumption (from draining and re-charging
|
||||||
|
7 extra wires). This design, combined with my idea of placing the write block
|
||||||
|
in the middle of the column, can lead to very short effective wire lengths. If
|
||||||
|
I was to approach this project again, that's what I would try.
|
||||||
|
|
||||||
|
\section{Acknowledgements}
|
||||||
|
Reed's aforementioned idea of sharing well contacts between adjacent cells
|
||||||
|
played a part in my design. Also, without the other students in the class
|
||||||
|
Discord, I would not have known to use the ``better'' wire model at all.
|
||||||
|
|
||||||
|
\pagebreak
|
||||||
|
\bibliographystyle{unsrt}
|
||||||
|
\bibliography{bibliography}
|
||||||
|
|
||||||
|
\end{document}
|
||||||
70
final/testBuffer.cir
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
|
||||||
|
|
||||||
|
* File includes subcircuits and technology definitions
|
||||||
|
.include ./SRAM_bits.cir
|
||||||
|
|
||||||
|
|
||||||
|
*this cell emulates load from SRAM cells,
|
||||||
|
* Number refers to the load from than number of cells
|
||||||
|
.subckt memLoad ttt fff number=254
|
||||||
|
Xnt ttt gnd dead nn ww='number*5'
|
||||||
|
Xnf fff gnd dead nn ww='number*5'
|
||||||
|
.ends memLoad
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
*********begin: topLevel*****
|
||||||
|
|
||||||
|
* Parameters
|
||||||
|
.global gnd vdd
|
||||||
|
.param gnd=0
|
||||||
|
|
||||||
|
|
||||||
|
*********begin: topLevel*****
|
||||||
|
.param per = 1.3ns
|
||||||
|
.param dataLead=per*0.1
|
||||||
|
.param lw=2200
|
||||||
|
.param wirew=14
|
||||||
|
|
||||||
|
vdd vdd 0 'supply'
|
||||||
|
|
||||||
|
Xclok clk dat1 period='per' start='per+dataLead' total=1 duty=0.5 sz=300
|
||||||
|
Xad ad dat1 period='per' start='per' total=1 duty=0.5 sz=300
|
||||||
|
Xrdwr rdw dat1 period='per' start='2*per' total=2 duty=1 sz=300
|
||||||
|
Xdii din dat1 period='per' start='per' total=4 duty=2 sz=300
|
||||||
|
|
||||||
|
Xinv1 clkb1 clk inv
|
||||||
|
Xinv2 clkb2 clkb1 inv
|
||||||
|
Xinv3 clkb3 clkb2 inv
|
||||||
|
Xinv4 clkb4 clkb3 inv size='300'
|
||||||
|
Xinv5 clkb5 clkb4 inv
|
||||||
|
Xinv6 clkb6 clkb5 inv size='300'
|
||||||
|
|
||||||
|
Xad adf ad clk flop
|
||||||
|
Xdinff dinf din clk flop
|
||||||
|
Xrdwff rdwf rdw clk flop
|
||||||
|
Xrotff dotf dot clk flop
|
||||||
|
Xdec choose adf clk decModel
|
||||||
|
|
||||||
|
Xwr bt3 bf3 dinf rdwf adf clkb6 iWrite1
|
||||||
|
Xw1 bt1 bt2 bf1 bf2 clk wire_precharge len='lw/4' wid='wirew'
|
||||||
|
Xmd1 bt2 bf2 memLoad number=15
|
||||||
|
Xw2 bt2 bt3 bf2 bf3 clk wire_precharge len='lw/4' wid='wirew'
|
||||||
|
Xmd2 bt3 bf3 memLoad number=16
|
||||||
|
Xw3 bt3 bt4 bf3 bf4 clk wire_precharge len='lw/4' wid='wirew'
|
||||||
|
Xmd3 bt4 bf4 memLoad number=16
|
||||||
|
Xw4 bt4 btt bf4 bff clk wire_precharge len='lw/4' wid='wirew'
|
||||||
|
Xmd4 bt3 bf3 memLoad number =16
|
||||||
|
* Xla bt1 bf1 choose mem1
|
||||||
|
* Xla bt3 bf3 choose mem1
|
||||||
|
Xla btt bff choose mem1
|
||||||
|
Xrd btt bff set rst rdwf clk choose iReadSub
|
||||||
|
Xrc dot set rst vdd vdd vdd vdd vdd vdd readCollect
|
||||||
|
|
||||||
|
.ic V(la:tt)=0 V(la:ff)=1
|
||||||
|
.ic V(bt2)=1
|
||||||
|
.tran 1p 'per*20'
|
||||||
|
.meas tran dot_delay trig V(clk) val=0.8*supply rise=2 targ V(dot) val=0.8*supply rise=1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
47
final/testDecoder.cir
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
|
||||||
|
|
||||||
|
* File includes subcircuits and technology definitions
|
||||||
|
.include ./SRAM_bits.cir
|
||||||
|
|
||||||
|
|
||||||
|
*this cell emulates load from SRAM cells,
|
||||||
|
* Number refers to the load from than number of cells
|
||||||
|
.subckt memLoad ttt fff number=254
|
||||||
|
Xnt ttt gnd dead nn ww='number*5'
|
||||||
|
Xnf fff gnd dead nn ww='number*5'
|
||||||
|
.ends memLoad
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
*********begin: topLevel*****
|
||||||
|
|
||||||
|
* Parameters
|
||||||
|
.global gnd vdd
|
||||||
|
.param gnd=0
|
||||||
|
|
||||||
|
|
||||||
|
*********begin: topLevel*****
|
||||||
|
.param per = 5ns
|
||||||
|
.param lw=500
|
||||||
|
.param wirew=3
|
||||||
|
|
||||||
|
|
||||||
|
*DC supplies
|
||||||
|
vdd vdd 0 'supply'
|
||||||
|
Xclok clk dat1 period='per' start='per' total=1 duty=0.5 sz=120
|
||||||
|
|
||||||
|
Xbit ad0 dat1 period='per' start='0.5*per' total=3 duty=1
|
||||||
|
Xde ope ad0 clk decModel size=20
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
.tran 1p 25n
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
61
final/testMem.cir
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
|
||||||
|
|
||||||
|
* File includes subcircuits and technology definitions
|
||||||
|
.include ./SRAM_bits.cir
|
||||||
|
|
||||||
|
|
||||||
|
*this cell emulates load from SRAM cells,
|
||||||
|
* Number refers to the load from than number of cells
|
||||||
|
.subckt memLoad ttt fff number=254
|
||||||
|
Xnt ttt gnd dead nn ww='number*5'
|
||||||
|
Xnf fff gnd dead nn ww='number*5'
|
||||||
|
.ends memLoad
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
*********begin: topLevel*****
|
||||||
|
|
||||||
|
* Parameters
|
||||||
|
.global gnd vdd
|
||||||
|
.param gnd=0
|
||||||
|
|
||||||
|
|
||||||
|
*********begin: topLevel*****
|
||||||
|
.param per = 5ns
|
||||||
|
.param lw=100
|
||||||
|
.param wirew=3
|
||||||
|
|
||||||
|
|
||||||
|
*DC supplies
|
||||||
|
vdd vdd 0 'supply'
|
||||||
|
Xclok clk dat1 period='0.5*per' total=1 duty=0.5 sz=120
|
||||||
|
Xdii dii dat1 period='per' start='per' total=3 duty=1
|
||||||
|
Xbit ad0 dat1 period='per' start='0.5*per' total=3 duty=1
|
||||||
|
Xde ope ad0 clk decModel size=20
|
||||||
|
|
||||||
|
* hardwire rdw signal to gnd
|
||||||
|
Xwr bt0 bf0 dii gnd clk write1
|
||||||
|
Xw0 bt0 bt1 bf0 bf1 wire_dual len='lw' wid='wirew'
|
||||||
|
|
||||||
|
* Place memory cell at end of wire
|
||||||
|
* First make sure it works with short wire and few memory cells
|
||||||
|
* View on plotter
|
||||||
|
*v(ope), v(dii)
|
||||||
|
*v(la:ff) v(la:tt)
|
||||||
|
*v(bf1) and v(bt1)
|
||||||
|
Xla bt1 bf1 ope mem1 m=1
|
||||||
|
Xmd bt1 bf1 memLoad number =254
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
*14.462274109131130
|
||||||
|
|
||||||
|
|
||||||
|
.tran 1p 50n
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
61
final/testRead.cir
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
|
||||||
|
|
||||||
|
* File includes subcircuits and technology definitions
|
||||||
|
.include ./SRAM_bits.cir
|
||||||
|
|
||||||
|
|
||||||
|
*this cell emulates load from SRAM cells,
|
||||||
|
* Number refers to the load from than number of cells
|
||||||
|
.subckt memLoad ttt fff number=254
|
||||||
|
Xnt ttt gnd dead nn ww='number*5'
|
||||||
|
Xnf fff gnd dead nn ww='number*5'
|
||||||
|
.ends memLoad
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
*********begin: topLevel*****
|
||||||
|
|
||||||
|
* Parameters
|
||||||
|
.global gnd vdd
|
||||||
|
.param gnd=0
|
||||||
|
|
||||||
|
|
||||||
|
*********begin: topLevel*****
|
||||||
|
.param per = 3n
|
||||||
|
.param lw=5000
|
||||||
|
.param wirew=3
|
||||||
|
|
||||||
|
|
||||||
|
*DC supplies
|
||||||
|
vdd vdd 0 'supply'
|
||||||
|
|
||||||
|
Xclok clk dat1 period='per' start='per' total=1 duty=0.5 sz=120
|
||||||
|
Xrdwr rdw dat1 period='per' start='per' total=2 duty=1
|
||||||
|
Xdii dii dat1 period='per' start='per' total=3 duty=1
|
||||||
|
|
||||||
|
|
||||||
|
* vary
|
||||||
|
.param dip=0.05
|
||||||
|
Vt bt2 0 PULSE('supply''supply-dip' 'per' 10p 10p '2*per' '4*per')
|
||||||
|
Vf bf2 0 PULSE('supply-dip''supply' 'per' 10p 10p '2*per' '4*per')
|
||||||
|
|
||||||
|
Xbit ad0 dat1 period='per' start='0.5*per' total=3 duty=1
|
||||||
|
Xde ope ad0 clk decModel size=20
|
||||||
|
|
||||||
|
|
||||||
|
* Xrd bt2 bf2 dot vdd clk read1
|
||||||
|
Xrd bt2 bf2 set rst vdd clk readSub
|
||||||
|
|
||||||
|
|
||||||
|
.ic v(dot)=0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
.tran 1p 50n
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
58
final/testSRAM.cir
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
|
||||||
|
|
||||||
|
* File includes subcircuits and technology definitions
|
||||||
|
.include ./SRAM_bits.cir
|
||||||
|
|
||||||
|
|
||||||
|
*this cell emulates load from SRAM cells,
|
||||||
|
* Number refers to the load from than number of cells
|
||||||
|
.subckt memLoad ttt fff number=254
|
||||||
|
Xnt ttt gnd dead nn ww='number*5'
|
||||||
|
Xnf fff gnd dead nn ww='number*5'
|
||||||
|
.ends memLoad
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
*********begin: topLevel*****
|
||||||
|
|
||||||
|
* Parameters
|
||||||
|
.global gnd vdd
|
||||||
|
.param gnd=0
|
||||||
|
|
||||||
|
|
||||||
|
*********begin: topLevel*****
|
||||||
|
.param per = 100ns
|
||||||
|
.param lw=500
|
||||||
|
.param wirew=3
|
||||||
|
|
||||||
|
|
||||||
|
*DC supplies
|
||||||
|
vdd vdd 0 'supply'
|
||||||
|
Xclok clk dat1 period='0.5*per' total=1 duty=0.5 sz=120
|
||||||
|
Xrdwr rdw dat1 period='per' start='per' total=2 duty=1
|
||||||
|
*Vrdw rdw 0 'supply'
|
||||||
|
Xbit ad0 dat1 period='per' start='per' total=3 duty=1
|
||||||
|
Xdii dii dat1 period='4*per' total=1 duty=0.5 sz=120
|
||||||
|
Xacc acc dat1 period='per' start='per+10ps' total=2 duty=1
|
||||||
|
|
||||||
|
*
|
||||||
|
Xwr bt0 bf0 dii rdw clk write1
|
||||||
|
Xw0 bt0 bt1 bf0 bf1 wire_dual len='lw' wid='wirew'
|
||||||
|
Xla bt1 bf1 ope mem1
|
||||||
|
Xmd bt1 bf1 memLoad number =1
|
||||||
|
|
||||||
|
Xw1 bt1 bt2 bf1 bf2 wire_dual len='lw' wid='wirew'
|
||||||
|
Xrd bt2 bf2 dot rdw clk read1
|
||||||
|
|
||||||
|
Xde ope ad0 clk decModel size=10
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
.tran 1ps 1600ns
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
51
final/testWrite.cir
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
|
||||||
|
|
||||||
|
* File includes subcircuits and technology definitions
|
||||||
|
.include ./SRAM_bits.cir
|
||||||
|
|
||||||
|
|
||||||
|
*this cell emulates load from SRAM cells,
|
||||||
|
* Number refers to the load from than number of cells
|
||||||
|
.subckt memLoad ttt fff number=254
|
||||||
|
Xnt ttt gnd dead nn ww='number*5'
|
||||||
|
Xnf fff gnd dead nn ww='number*5'
|
||||||
|
.ends memLoad
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
*********begin: topLevel*****
|
||||||
|
|
||||||
|
* Parameters
|
||||||
|
.global gnd vdd
|
||||||
|
.param gnd=0
|
||||||
|
|
||||||
|
|
||||||
|
*********begin: topLevel*****
|
||||||
|
.param per = 1ns
|
||||||
|
.param lw=500
|
||||||
|
.param wirew=3
|
||||||
|
|
||||||
|
|
||||||
|
*DC supplies
|
||||||
|
|
||||||
|
* make sure data signal is set up before clock signal triggers write
|
||||||
|
* possible NOR rdw and Clk, and then maybe delay clk?
|
||||||
|
* connect PMOS transistors to output of NOR gate, not directly to clk
|
||||||
|
|
||||||
|
|
||||||
|
vdd vdd 0 'supply'
|
||||||
|
Xclok clk dat1 period='per' start='per' total=1 duty=0.5 sz=120
|
||||||
|
Xrdwr rdw dat1 period='per' start='per' total=2 duty=1
|
||||||
|
Xdii dii dat1 period='per' start='per+0' total=3 duty=1
|
||||||
|
|
||||||
|
Xwr bt0 bf0 dii gnd clk write1
|
||||||
|
|
||||||
|
|
||||||
|
.tran 1p 15n
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
13
final/todo.md
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
* [x] Figure out the weird opAmp behavior
|
||||||
|
* [x] Design cell with strict metal policies
|
||||||
|
* [x] Add precharger version of memory cell (or explain how they compose)
|
||||||
|
* [x] Test cell in the _middle_.
|
||||||
|
* [x] Walk through the consequences of the read/write block being in the middle.
|
||||||
|
* [x] Figure out what to do with flopped write block.
|
||||||
|
* [x] Test data close to write block (it pulls up past clock low!)
|
||||||
|
* [ ] Drive wires to zero?
|
||||||
|
* [x] Add missing well connection in layout
|
||||||
|
* [x] Make sure width isn't too horrible
|
||||||
|
* [ ] Model additional delay for read read/write block select?
|
||||||
|
* [x] Model worst case of decoder
|
||||||
|
* [x] Cite [this](https://ieeexplore.ieee.org/document/210039)
|
||||||
BIN
final/toplevel.png
Normal file
|
After Width: | Height: | Size: 149 KiB |
BIN
final/toplevel_design.png
Normal file
|
After Width: | Height: | Size: 192 KiB |
BIN
final/write.png
Normal file
|
After Width: | Height: | Size: 13 KiB |