User:Myask/MyaGrafx: Difference between revisions

From NESdev Wiki
Jump to navigationJump to search
(→‎Basic: increase verilogness)
m (→‎Basic Implementation: oops, CPUA15 isn't simply available)
 
(11 intermediate revisions by the same user not shown)
Line 2: Line 2:
==Sources==
==Sources==
[[PPU rendering]],
[[PPU rendering]],
[[Cartridge connector]]
[[Cartridge connector]],
[[6264]]
 
==Basic==
==Basic==
As the cart only has CIRAM A10 piped through, one can't just remap part of CIRAM to supply the 256 bytes of attribute one needs for page 0.
As the cart only has CIRAM A10 piped through, one can't just remap part of CIRAM to supply the 256 bytes of attribute one needs for page 0.


So, for the basicmost case... PPU: --10 NT11 11yy yxxx is where attributes normally reside.  To our advantage, the nametable byte precedes the attribute table byte. This allows at least the 8x8 tile to be detected: NT-fetch is PPU: --10 NTYY YyyX XXxx. (A0, A5 are what we want to know, as they're not in the AT fetch; A1,6 are the 16-px accounted for by which two bits within an attribute byte.). For this simple one, instead of storing the attribute data per-tile, it's arranged as usual: four tiles in a 32x32 are specified each byte, just that the game pak will return different byte depending on the X and Y tile evenness.
So, for the basicmost case... PPU: --10 NT11 11yy yxxx is where attributes normally reside.  To our advantage, the nametable byte precedes the attribute table byte. This allows at least the 8x8 tile to be detected: NT-fetch is PPU: --10 NTYY YyyX XXxx. (A0, A5 are what we want to know, as they're not in the AT fetch; A1,6 are the 16-px accounted for by which two bits within an attribute byte.). For this simple one, instead of storing the attribute data per-tile, it's arranged as usual: four tiles in a 32x32 are specified each byte, just that the game pak will return different byte depending on the X and Y tile evenness.
----
==Basic Implementation==
  //pseudo-verilog
  /* NES 8x8(8x1?)-Attribute graphics mapper
   
    Created by "Myask", April 2015
  always @(negedge PPU_/RD)
    8x8-mapper:
  if (PPU_A[13] & (~& PPUA[9:8])) begin //NT-fetch
      Accepts input at ppu_a == 14'b__11_NTx0_xxxx_xxxx
    AT_8X <= PPUA[0];
        (ppu_a[8] == 0 to avoid conflict with palettes)
    AT_8Y <= PPUA[5];
        where ppu_a[7:4] are the 16px-tile level Y
  end  
        and ppu_a[3:0] are the 16px-tile level X.
        Bitpairs are organised as normal, just at 16x16 instead.
always @(posedge M2)
      (genuinely indifferent to ppu_a[9] on write.)
  if (~CPU_W & CPU_A[15] == 1) //only have one visible register bit, so little decoding necessary: CPU$8xxx.
                  and cpu_a == 16'b1xxx_xxxx_xxxx_xxxx
    Mya_ATRAM_Enable <= CPU_D[0];  
        to enable,    cpu_d ==  8'bxxxx_xxx1
        to disable,  cpu_d == 8'bxxxx_xxx0
assign Mya_ATRAM_A[5:0] = PPU_A[5:0]; //don't really need to go through CPLD?
      Outputs at      ppu_a == 14'b__10_NT11_11YY_YXXX
assign Mya_ATRAM_A[6] = (PPU_A[12] ? PPU_A[6]: AT_8X); //write to PPU  0011 NT*0 YXyy yxxx, A8=0 is to dodge palettes.
        using the previous nametable fetch to select which of four possible
assign Mya_ATRAM_A[7] = (PPU_A[12] ? PPU_A[7]: AT_8Y);
        attribute byte to return.
assign Mya_ATRAM_A[9:8] = PPU_A[11:10]; //NT-select. Also don't need to be routed through.
  */
assign Mya_ATRAM_WR = PPU_WR & (&PPU_A[13:12]) & ~PPU_A[8];
  module mya_at_mapper{
assign UL = ~(AT_8X | AT_8Y); //UpperLeft: replace with 0 to not bother using CIRAM for any attributes at all
    system_clk,
assign Mya_ATRAM_CE = PPU_A[13] & (PPU_A[12] ? (~PPU_A[8]) : PPU_A[8]) & PPU_A[9] & ~UL & Mya_ATRAM_Enable;
    m2,
assign CIRAM_CE = PPUA[13]    //NT/AT only
    cpu_rw, //high=r
  & (~PPUA[12] | (& PPUA[11:8]))  //disable for 30xx-3Exx to allow the writes to cart
    cpu_a,
  & ((~&PPUA[9:6]) ? UL : 1); //and enable for the UL AT fetches and all NT fetches.
    cpu_d,
assign Mya_ATRAM_D[7:0] = PPU_D[7:0]
    romsel_n, //traditionally _n signifies active-low
----
    irq_n,
    exp,
    ppu_wr_n,
    ppu_rd_n,
    ppu_a,
    ppu_d,
    ppu_a13_n,
    ciram_ce_n,
    ciram_a10,
    mya_atram_a,
    mya_atram_d,
    mya_atram_we_n,
    mya_atram_oe_n,
    mya_atram_cs1_n,
    mya_atram_cs2
  };
  //first, cart-edge signals
  input        system_clk;
  input        m2;
  input        cpu_rw;
  input [14:0] cpu_a;
  inout  [7:0] cpu_d;
  input        romsel_n;
 
  output        irq_n;
 
  inout  [9:0] exp;
 
  input        ppu_wr_n;
  input        ppu_rd_n;
  input [13:0] ppu_a;
  inout  [7:0] ppu_d;
  input        ppu_a13_n;
 
  output        ciram_ce_n;
  output        ciram_a10;
  //then cart-internals: first, the (probably 6264)
  output [12:0] mya_atram_a;
  inout  [7:0] mya_atram_d;
  output        mya_atram_we_n;
  output        mya_atram_oe_n;
  output        mya_atram_cs1_n;
  output        mya_atram_cs2;
  //then make all the variable names (sigh) Cart-external:
  wire          system_clk;
  wire          m2;
  wire          cpu_rw;
  wire  [14:0] cpu_a;
  wire    [7:0] cpu_d;
  wire          romsel_n;
  wire          irq_n;
  wire    [9:0] exp;
 
  wire          ppu_wr_n;
  wire          ppu_rd_n;
  wire  [13:0] ppu_a;
  wire    [7:0] ppu_d;
  wire          ppu_a13_n;
 
  wire          ciram_ce_n;
  wire          ciram_a10;
  //cart-internal:
  wire  [12:0] mya_atram_a;
  wire    [7:0] mya_atram_d;
  wire          mya_atram_we_n;
  wire          mya_atram_oe_n;
  wire          mya_atram_cs1_n;
  reg          mya_atram_cs2;
  //chip-internal:
  reg    [1:0] at_8x;
  reg    [1:0] at_8y;
  wire          ul;
  //reg    [2:0] at_finey;
 
  always @(negedge ppu_rd_n)  
    if (ppu_a[13] & (~& ppu_a[9:8])) begin //trap nt-fetch
      at_8x[1:0] <= ppu_a[1:0]; //and store the 8&16px-level X
      at_8y[1:0] <= ppu_a[6:5]; //and Y-coordinates.
    end //trap nt-fetch
 
  always @(posedge m2)
    if (~cpu_rw & ~romsel_n)
      mya_atram_cs2 <= cpu_d[0];
  //only have one visible register bit, so little decoding necessary: CPU$8xxx.
 
  assign mya_atram_a[7:5] = ppu_a[5:3];  
  assign mya_atram_a[3:1] = ppu_a[2:0];
  assign mya_atram_a[0] = (ppu_a[12] ? ppu_a[6]: at_8x[1]);  
  assign mya_atram_a[4] = (ppu_a[12] ? ppu_a[7]: at_8y[1]);
  assign mya_atram_a[9:8] = ppu_a[11:10];  
  //ppu_a[11:10] = NT-select. also don't need to be routed through CPLD
  assign mya_atram_a[12:10] = 3'b000; //Doing fineY-AT'd need a new write port
  assign ul = 1'b0; //~(at_8x | at_8y);  
  //assign ul with 0 to not bother using ciram for any attributes at all
  assign mya_atram_oe_n = ~( ppu_a[13]
    & (& ppu_a[9:6]) //Nametable: @PPU 16'b0010_xx11_11xx_xxxx
    & ~ul & ~ppu_rd_n);
  assign mya_atram_we_n = ~( ~ppu_wr_n & (&ppu_a[13:12]) & ~ppu_a[8] );
    //Write-port: @PPU 14'b11_NT?0_YYYY_XXXX: ~a[8] to avoid palettespace
  assign mya_atram_cs1_n = 1'b0;
  //assign mya_atram_cs2 = mya_atram_enable;
  assign ciram_ce_n = ~(ppu_a[13]    //nt/at only
    & (~ppu_a[12]) // | (& ppu_a[11:8]))  (palette ram cares not for CIRAM/CE
    & ((~& ppu_a[9:6]) ? ul : 1'b1) );//enable for the ul AT- and all NT- fetches.
 
  always @ (mya_atram_d or mya_atram_oe_n) begin//ppu_d mux logic
    //note: may need additional always@ args, (and combining)
    //as-is might do stupid things like making ppu_d a latch?
    if (mya_atram_oe_n) //that is, when not reading...
      ppu_d[7:0] = mya_atram_d[7:0];
    else begin //when reading by AT, mux out the correct bitpair
      case ({at_8y[0],at_8x[0]})
        2'b00: ppu_d = {4{mya_atram_d[1:0]}}; //upper-left
        2'b01: ppu_d = {4{mya_atram_d[3:2]}}; //upper-right
        2'b10: ppu_d = {4{mya_atram_d[5:4]}}; //lower-left
        2'b11: ppu_d = {4{mya_atram_d[7:6]}}; //lower-right
        default: ppu_d[7:0] = mya_atram_d[7:0];
      endcase
    end //
  end //ppu_d mux logic
 
  endmodule //mya_at_mapper


==Less basic==
==Less basic==
This mode of writing does not work if we want to extend to 8x1 attributes; there are three bits of attribute space to add and we only have three choices (00, 01, 10) of PPUADDR 8-9 for NT3. Even in two-screen mirroring, there is a small problem: but as we are relying on CIRAM for the first sliver of each section, one does not need to have duplicate write-access to those. One could remap $38** to what would have been in $3F**. Four-screen proves more problematic. Also problematic is determining the fine-Y. Brute-force method is to snoop for writes to PPU_SCROLL, as well as reads from PPU_STATUS and writes to PPU_ADDR to know the high-byte latch status. If we don't want to allow raster effects, which seems like a short-sighted decision, perhaps one could somehow divine where to begin from the dummy-fetch prerender scanline. In any case, it would basically require a scanline counter, at which point one would just add a few more bits of state to get a useful scanline-type interrupt, though if it shares the low three bits with the rendering portion it would be more of a NT-relative Y-coordinate interrupt...
This mode of writing does not work if we want to extend to 8x1 attributes; there are three bits of attribute space to add and we only have three choices (00, 01, 10) of PPUADDR 8-9 for NT3. Even in two-screen mirroring, there is a small problem: but as we are relying on CIRAM for the first sliver of each section, one does not need to have duplicate write-access to those. One could remap $38** to what would have been in $3F**. Four-screen proves more problematic. Also problematic is determining the fine-Y. Brute-force method is to snoop for writes to PPU_SCROLL, as well as reads from PPU_STATUS and writes to PPU_ADDR to know the high-byte latch status. If we don't want to allow raster effects, which seems like a short-sighted decision, perhaps one could somehow divine where to begin from the dummy-fetch prerender scanline. In any case, it would basically require a scanline counter, at which point one would just add a few more bits of state to get a useful scanline-type interrupt, though if it shares the low three bits with the rendering portion it would be more of a NT-relative Y-coordinate interrupt...

Latest revision as of 09:50, 5 May 2015

"Perhaps someone should mock up a specification for a CPLD that only provides 8x8 attributes and nothing else."--Tepples

Sources

PPU rendering, Cartridge connector, 6264

Basic

As the cart only has CIRAM A10 piped through, one can't just remap part of CIRAM to supply the 256 bytes of attribute one needs for page 0.

So, for the basicmost case... PPU: --10 NT11 11yy yxxx is where attributes normally reside. To our advantage, the nametable byte precedes the attribute table byte. This allows at least the 8x8 tile to be detected: NT-fetch is PPU: --10 NTYY YyyX XXxx. (A0, A5 are what we want to know, as they're not in the AT fetch; A1,6 are the 16-px accounted for by which two bits within an attribute byte.). For this simple one, instead of storing the attribute data per-tile, it's arranged as usual: four tiles in a 32x32 are specified each byte, just that the game pak will return different byte depending on the X and Y tile evenness.

Basic Implementation

 /* NES 8x8(8x1?)-Attribute graphics mapper
   Created by "Myask", April 2015
   8x8-mapper:
     Accepts input at ppu_a == 14'b__11_NTx0_xxxx_xxxx 
        (ppu_a[8] == 0 to avoid conflict with palettes)
        where ppu_a[7:4] are the 16px-tile level Y
        and ppu_a[3:0] are the 16px-tile level X.
        Bitpairs are organised as normal, just at 16x16 instead.
     (genuinely indifferent to ppu_a[9] on write.)
                  and cpu_a == 16'b1xxx_xxxx_xxxx_xxxx
        to enable,    cpu_d ==  8'bxxxx_xxx1
        to disable,   cpu_d ==  8'bxxxx_xxx0
     Outputs at       ppu_a == 14'b__10_NT11_11YY_YXXX
       using the previous nametable fetch to select which of four possible
       attribute byte to return.
 */
 module mya_at_mapper{
   system_clk,
   m2,
   cpu_rw, //high=r
   cpu_a,
   cpu_d,
   romsel_n, //traditionally _n signifies active-low
   irq_n,
   exp,
   ppu_wr_n,
   ppu_rd_n,
   ppu_a,
   ppu_d,
   ppu_a13_n,
   ciram_ce_n,
   ciram_a10,
   mya_atram_a,
   mya_atram_d,
   mya_atram_we_n,
   mya_atram_oe_n,
   mya_atram_cs1_n,
   mya_atram_cs2
 };
 //first, cart-edge signals
 input         system_clk;
 input         m2;
 input         cpu_rw;
 input  [14:0] cpu_a;
 inout   [7:0] cpu_d;
 input         romsel_n;
 
 output        irq_n;
 
 inout   [9:0] exp;
 
 input         ppu_wr_n;
 input         ppu_rd_n;
 input  [13:0] ppu_a;
 inout   [7:0] ppu_d;
 input         ppu_a13_n;
 
 output        ciram_ce_n;
 output        ciram_a10;
 //then cart-internals: first, the (probably 6264)
 output [12:0] mya_atram_a;
 inout   [7:0] mya_atram_d;
 output        mya_atram_we_n;
 output        mya_atram_oe_n;
 output        mya_atram_cs1_n;
 output        mya_atram_cs2;
 //then make all the variable names (sigh) Cart-external:
 wire          system_clk;
 wire          m2;
 wire          cpu_rw;
 wire   [14:0] cpu_a;
 wire    [7:0] cpu_d;
 wire          romsel_n;
 wire          irq_n;
 wire    [9:0] exp;
 
 wire          ppu_wr_n;
 wire          ppu_rd_n;
 wire   [13:0] ppu_a;
 wire    [7:0] ppu_d;
 wire          ppu_a13_n;
 
 wire          ciram_ce_n;
 wire          ciram_a10;
 //cart-internal:
 wire   [12:0] mya_atram_a;
 wire    [7:0] mya_atram_d;
 wire          mya_atram_we_n;
 wire          mya_atram_oe_n;
 wire          mya_atram_cs1_n;
 reg           mya_atram_cs2;
 //chip-internal:
 reg     [1:0] at_8x;
 reg     [1:0] at_8y; 
 wire          ul;
 //reg     [2:0] at_finey;
 
 always @(negedge ppu_rd_n) 
   if (ppu_a[13] & (~& ppu_a[9:8])) begin //trap nt-fetch
     at_8x[1:0] <= ppu_a[1:0]; //and store the 8&16px-level X
     at_8y[1:0] <= ppu_a[6:5]; //and Y-coordinates.
   end //trap nt-fetch
 
 always @(posedge m2)
   if (~cpu_rw & ~romsel_n) 
     mya_atram_cs2 <= cpu_d[0]; 
 //only have one visible register bit, so little decoding necessary: CPU$8xxx.
 
 assign mya_atram_a[7:5] = ppu_a[5:3]; 
 assign mya_atram_a[3:1] = ppu_a[2:0];
 assign mya_atram_a[0] = (ppu_a[12] ? ppu_a[6]: at_8x[1]); 
 assign mya_atram_a[4] = (ppu_a[12] ? ppu_a[7]: at_8y[1]);
 assign mya_atram_a[9:8] = ppu_a[11:10]; 
 //ppu_a[11:10] = NT-select. also don't need to be routed through CPLD
 assign mya_atram_a[12:10] = 3'b000; //Doing fineY-AT'd need a new write port
 assign ul = 1'b0; //~(at_8x | at_8y); 
 //assign ul with 0 to not bother using ciram for any attributes at all
 assign mya_atram_oe_n = ~( ppu_a[13]
   & (& ppu_a[9:6]) //Nametable: @PPU 16'b0010_xx11_11xx_xxxx
   & ~ul & ~ppu_rd_n);
 assign mya_atram_we_n = ~( ~ppu_wr_n & (&ppu_a[13:12]) & ~ppu_a[8] );
   //Write-port: @PPU 14'b11_NT?0_YYYY_XXXX: ~a[8] to avoid palettespace
 assign mya_atram_cs1_n = 1'b0;
 //assign mya_atram_cs2 = mya_atram_enable;
 assign ciram_ce_n = ~(ppu_a[13]    //nt/at only
   & (~ppu_a[12]) // | (& ppu_a[11:8]))  (palette ram cares not for CIRAM/CE
   & ((~& ppu_a[9:6]) ? ul : 1'b1) );//enable for the ul AT- and all NT- fetches.
 
 always @ (mya_atram_d or mya_atram_oe_n) begin//ppu_d mux logic
   //note: may need additional always@ args, (and combining) 
   //as-is might do stupid things like making ppu_d a latch?
   if (mya_atram_oe_n) //that is, when not reading...
     ppu_d[7:0] = mya_atram_d[7:0];
   else begin //when reading by AT, mux out the correct bitpair
     case ({at_8y[0],at_8x[0]})
       2'b00: ppu_d = {4{mya_atram_d[1:0]}}; //upper-left
       2'b01: ppu_d = {4{mya_atram_d[3:2]}}; //upper-right
       2'b10: ppu_d = {4{mya_atram_d[5:4]}}; //lower-left
       2'b11: ppu_d = {4{mya_atram_d[7:6]}}; //lower-right
       default: ppu_d[7:0] = mya_atram_d[7:0];
     endcase
   end //
 end //ppu_d mux logic
 
 endmodule //mya_at_mapper

Less basic

This mode of writing does not work if we want to extend to 8x1 attributes; there are three bits of attribute space to add and we only have three choices (00, 01, 10) of PPUADDR 8-9 for NT3. Even in two-screen mirroring, there is a small problem: but as we are relying on CIRAM for the first sliver of each section, one does not need to have duplicate write-access to those. One could remap $38** to what would have been in $3F**. Four-screen proves more problematic. Also problematic is determining the fine-Y. Brute-force method is to snoop for writes to PPU_SCROLL, as well as reads from PPU_STATUS and writes to PPU_ADDR to know the high-byte latch status. If we don't want to allow raster effects, which seems like a short-sighted decision, perhaps one could somehow divine where to begin from the dummy-fetch prerender scanline. In any case, it would basically require a scanline counter, at which point one would just add a few more bits of state to get a useful scanline-type interrupt, though if it shares the low three bits with the rendering portion it would be more of a NT-relative Y-coordinate interrupt...