User:Myask/MyaGrafx: Difference between revisions
m (minor fix, 8px-addr not 16px) |
m (→Basic Implementation: oops, CPUA15 isn't simply available) |
||
(13 intermediate revisions by the same user not shown) | |||
Line 2: | Line 2: | ||
==Sources== | ==Sources== | ||
[[PPU rendering]], | [[PPU rendering]], | ||
[[Cartridge connector]] | [[Cartridge connector]], | ||
[[6264]] | |||
==Basic== | ==Basic== | ||
As the cart only has CIRAM A10 piped through, one can't just remap part of CIRAM to supply the 256 bytes of attribute one needs for page 0. | As the cart only has CIRAM A10 piped through, one can't just remap part of CIRAM to supply the 256 bytes of attribute one needs for page 0. | ||
So, for the basicmost case... PPU: --10 NT11 11yy yxxx is where attributes normally reside. To our advantage, the nametable byte precedes the attribute table byte. This allows at least the 8x8 tile to be detected: NT-fetch is PPU: --10 NTYY YyyX XXxx. (A0, A5 are what we want to know, as they're not in the AT fetch; A1,6 are the 16-px accounted for by which two bits within an attribute byte.). For this simple one, instead of storing the attribute data per-tile, it's arranged as usual: four tiles in a 32x32 are specified each byte, just that the game pak will return different byte depending on the X and Y tile evenness. | So, for the basicmost case... PPU: --10 NT11 11yy yxxx is where attributes normally reside. To our advantage, the nametable byte precedes the attribute table byte. This allows at least the 8x8 tile to be detected: NT-fetch is PPU: --10 NTYY YyyX XXxx. (A0, A5 are what we want to know, as they're not in the AT fetch; A1,6 are the 16-px accounted for by which two bits within an attribute byte.). For this simple one, instead of storing the attribute data per-tile, it's arranged as usual: four tiles in a 32x32 are specified each byte, just that the game pak will return different byte depending on the X and Y tile evenness. | ||
---- | ==Basic Implementation== | ||
// | /* NES 8x8(8x1?)-Attribute graphics mapper | ||
Created by "Myask", April 2015 | |||
8x8-mapper: | |||
if ( | Accepts input at ppu_a == 14'b__11_NTx0_xxxx_xxxx | ||
(ppu_a[8] == 0 to avoid conflict with palettes) | |||
where ppu_a[7:4] are the 16px-tile level Y | |||
and ppu_a[3:0] are the 16px-tile level X. | |||
Bitpairs are organised as normal, just at 16x16 instead. | |||
(genuinely indifferent to ppu_a[9] on write.) | |||
and cpu_a == 16'b1xxx_xxxx_xxxx_xxxx | |||
to enable, cpu_d == 8'bxxxx_xxx1 | |||
to disable, cpu_d == 8'bxxxx_xxx0 | |||
Outputs at ppu_a == 14'b__10_NT11_11YY_YXXX | |||
using the previous nametable fetch to select which of four possible | |||
attribute byte to return. | |||
*/ | |||
module mya_at_mapper{ | |||
system_clk, | |||
m2, | |||
cpu_rw, //high=r | |||
---- | cpu_a, | ||
cpu_d, | |||
romsel_n, //traditionally _n signifies active-low | |||
irq_n, | |||
exp, | |||
ppu_wr_n, | |||
ppu_rd_n, | |||
ppu_a, | |||
ppu_d, | |||
ppu_a13_n, | |||
ciram_ce_n, | |||
ciram_a10, | |||
mya_atram_a, | |||
mya_atram_d, | |||
mya_atram_we_n, | |||
mya_atram_oe_n, | |||
mya_atram_cs1_n, | |||
mya_atram_cs2 | |||
}; | |||
//first, cart-edge signals | |||
input system_clk; | |||
input m2; | |||
input cpu_rw; | |||
input [14:0] cpu_a; | |||
inout [7:0] cpu_d; | |||
input romsel_n; | |||
output irq_n; | |||
inout [9:0] exp; | |||
input ppu_wr_n; | |||
input ppu_rd_n; | |||
input [13:0] ppu_a; | |||
inout [7:0] ppu_d; | |||
input ppu_a13_n; | |||
output ciram_ce_n; | |||
output ciram_a10; | |||
//then cart-internals: first, the (probably 6264) | |||
output [12:0] mya_atram_a; | |||
inout [7:0] mya_atram_d; | |||
output mya_atram_we_n; | |||
output mya_atram_oe_n; | |||
output mya_atram_cs1_n; | |||
output mya_atram_cs2; | |||
//then make all the variable names (sigh) Cart-external: | |||
wire system_clk; | |||
wire m2; | |||
wire cpu_rw; | |||
wire [14:0] cpu_a; | |||
wire [7:0] cpu_d; | |||
wire romsel_n; | |||
wire irq_n; | |||
wire [9:0] exp; | |||
wire ppu_wr_n; | |||
wire ppu_rd_n; | |||
wire [13:0] ppu_a; | |||
wire [7:0] ppu_d; | |||
wire ppu_a13_n; | |||
wire ciram_ce_n; | |||
wire ciram_a10; | |||
//cart-internal: | |||
wire [12:0] mya_atram_a; | |||
wire [7:0] mya_atram_d; | |||
wire mya_atram_we_n; | |||
wire mya_atram_oe_n; | |||
wire mya_atram_cs1_n; | |||
reg mya_atram_cs2; | |||
//chip-internal: | |||
reg [1:0] at_8x; | |||
reg [1:0] at_8y; | |||
wire ul; | |||
//reg [2:0] at_finey; | |||
always @(negedge ppu_rd_n) | |||
if (ppu_a[13] & (~& ppu_a[9:8])) begin //trap nt-fetch | |||
at_8x[1:0] <= ppu_a[1:0]; //and store the 8&16px-level X | |||
at_8y[1:0] <= ppu_a[6:5]; //and Y-coordinates. | |||
end //trap nt-fetch | |||
always @(posedge m2) | |||
if (~cpu_rw & ~romsel_n) | |||
mya_atram_cs2 <= cpu_d[0]; | |||
//only have one visible register bit, so little decoding necessary: CPU$8xxx. | |||
assign mya_atram_a[7:5] = ppu_a[5:3]; | |||
assign mya_atram_a[3:1] = ppu_a[2:0]; | |||
assign mya_atram_a[0] = (ppu_a[12] ? ppu_a[6]: at_8x[1]); | |||
assign mya_atram_a[4] = (ppu_a[12] ? ppu_a[7]: at_8y[1]); | |||
assign mya_atram_a[9:8] = ppu_a[11:10]; | |||
//ppu_a[11:10] = NT-select. also don't need to be routed through CPLD | |||
assign mya_atram_a[12:10] = 3'b000; //Doing fineY-AT'd need a new write port | |||
assign ul = 1'b0; //~(at_8x | at_8y); | |||
//assign ul with 0 to not bother using ciram for any attributes at all | |||
assign mya_atram_oe_n = ~( ppu_a[13] | |||
& (& ppu_a[9:6]) //Nametable: @PPU 16'b0010_xx11_11xx_xxxx | |||
& ~ul & ~ppu_rd_n); | |||
assign mya_atram_we_n = ~( ~ppu_wr_n & (&ppu_a[13:12]) & ~ppu_a[8] ); | |||
//Write-port: @PPU 14'b11_NT?0_YYYY_XXXX: ~a[8] to avoid palettespace | |||
assign mya_atram_cs1_n = 1'b0; | |||
//assign mya_atram_cs2 = mya_atram_enable; | |||
assign ciram_ce_n = ~(ppu_a[13] //nt/at only | |||
& (~ppu_a[12]) // | (& ppu_a[11:8])) (palette ram cares not for CIRAM/CE | |||
& ((~& ppu_a[9:6]) ? ul : 1'b1) );//enable for the ul AT- and all NT- fetches. | |||
always @ (mya_atram_d or mya_atram_oe_n) begin//ppu_d mux logic | |||
//note: may need additional always@ args, (and combining) | |||
//as-is might do stupid things like making ppu_d a latch? | |||
if (mya_atram_oe_n) //that is, when not reading... | |||
ppu_d[7:0] = mya_atram_d[7:0]; | |||
else begin //when reading by AT, mux out the correct bitpair | |||
case ({at_8y[0],at_8x[0]}) | |||
2'b00: ppu_d = {4{mya_atram_d[1:0]}}; //upper-left | |||
2'b01: ppu_d = {4{mya_atram_d[3:2]}}; //upper-right | |||
2'b10: ppu_d = {4{mya_atram_d[5:4]}}; //lower-left | |||
2'b11: ppu_d = {4{mya_atram_d[7:6]}}; //lower-right | |||
default: ppu_d[7:0] = mya_atram_d[7:0]; | |||
endcase | |||
end // | |||
end //ppu_d mux logic | |||
endmodule //mya_at_mapper | |||
==Less basic== | ==Less basic== | ||
This mode of writing does not work if we want to extend to 8x1 attributes; there are three bits of attribute space to add and we only have three choices (00, 01, 10) of PPUADDR 8-9 for NT3. Even in two-screen mirroring, there is a small problem: but as we are relying on CIRAM for the first sliver of each section, one does not need to have duplicate write-access to those. One could remap $38** to what would have been in $3F**. Four-screen proves more problematic. Also problematic is determining the fine-Y. Brute-force method is to snoop for writes to PPU_SCROLL, as well as reads from PPU_STATUS and writes to PPU_ADDR to know the high-byte latch status. If we don't want to allow raster effects, which seems like a short-sighted decision, perhaps one could somehow divine where to begin from the dummy-fetch prerender scanline. In any case, it would basically require a scanline counter, at which point one would just add a few more bits of state to get a useful scanline-type interrupt, though if it shares the low three bits with the rendering portion it would be more of a NT-relative Y-coordinate interrupt... | This mode of writing does not work if we want to extend to 8x1 attributes; there are three bits of attribute space to add and we only have three choices (00, 01, 10) of PPUADDR 8-9 for NT3. Even in two-screen mirroring, there is a small problem: but as we are relying on CIRAM for the first sliver of each section, one does not need to have duplicate write-access to those. One could remap $38** to what would have been in $3F**. Four-screen proves more problematic. Also problematic is determining the fine-Y. Brute-force method is to snoop for writes to PPU_SCROLL, as well as reads from PPU_STATUS and writes to PPU_ADDR to know the high-byte latch status. If we don't want to allow raster effects, which seems like a short-sighted decision, perhaps one could somehow divine where to begin from the dummy-fetch prerender scanline. In any case, it would basically require a scanline counter, at which point one would just add a few more bits of state to get a useful scanline-type interrupt, though if it shares the low three bits with the rendering portion it would be more of a NT-relative Y-coordinate interrupt... |
Latest revision as of 09:50, 5 May 2015
"Perhaps someone should mock up a specification for a CPLD that only provides 8x8 attributes and nothing else."--Tepples
Sources
PPU rendering, Cartridge connector, 6264
Basic
As the cart only has CIRAM A10 piped through, one can't just remap part of CIRAM to supply the 256 bytes of attribute one needs for page 0.
So, for the basicmost case... PPU: --10 NT11 11yy yxxx is where attributes normally reside. To our advantage, the nametable byte precedes the attribute table byte. This allows at least the 8x8 tile to be detected: NT-fetch is PPU: --10 NTYY YyyX XXxx. (A0, A5 are what we want to know, as they're not in the AT fetch; A1,6 are the 16-px accounted for by which two bits within an attribute byte.). For this simple one, instead of storing the attribute data per-tile, it's arranged as usual: four tiles in a 32x32 are specified each byte, just that the game pak will return different byte depending on the X and Y tile evenness.
Basic Implementation
/* NES 8x8(8x1?)-Attribute graphics mapper Created by "Myask", April 2015 8x8-mapper: Accepts input at ppu_a == 14'b__11_NTx0_xxxx_xxxx (ppu_a[8] == 0 to avoid conflict with palettes) where ppu_a[7:4] are the 16px-tile level Y and ppu_a[3:0] are the 16px-tile level X. Bitpairs are organised as normal, just at 16x16 instead. (genuinely indifferent to ppu_a[9] on write.) and cpu_a == 16'b1xxx_xxxx_xxxx_xxxx to enable, cpu_d == 8'bxxxx_xxx1 to disable, cpu_d == 8'bxxxx_xxx0 Outputs at ppu_a == 14'b__10_NT11_11YY_YXXX using the previous nametable fetch to select which of four possible attribute byte to return. */ module mya_at_mapper{ system_clk, m2, cpu_rw, //high=r cpu_a, cpu_d, romsel_n, //traditionally _n signifies active-low irq_n, exp, ppu_wr_n, ppu_rd_n, ppu_a, ppu_d, ppu_a13_n, ciram_ce_n, ciram_a10, mya_atram_a, mya_atram_d, mya_atram_we_n, mya_atram_oe_n, mya_atram_cs1_n, mya_atram_cs2 }; //first, cart-edge signals input system_clk; input m2; input cpu_rw; input [14:0] cpu_a; inout [7:0] cpu_d; input romsel_n; output irq_n; inout [9:0] exp; input ppu_wr_n; input ppu_rd_n; input [13:0] ppu_a; inout [7:0] ppu_d; input ppu_a13_n; output ciram_ce_n; output ciram_a10; //then cart-internals: first, the (probably 6264) output [12:0] mya_atram_a; inout [7:0] mya_atram_d; output mya_atram_we_n; output mya_atram_oe_n; output mya_atram_cs1_n; output mya_atram_cs2; //then make all the variable names (sigh) Cart-external: wire system_clk; wire m2; wire cpu_rw; wire [14:0] cpu_a; wire [7:0] cpu_d; wire romsel_n; wire irq_n; wire [9:0] exp; wire ppu_wr_n; wire ppu_rd_n; wire [13:0] ppu_a; wire [7:0] ppu_d; wire ppu_a13_n; wire ciram_ce_n; wire ciram_a10; //cart-internal: wire [12:0] mya_atram_a; wire [7:0] mya_atram_d; wire mya_atram_we_n; wire mya_atram_oe_n; wire mya_atram_cs1_n; reg mya_atram_cs2; //chip-internal: reg [1:0] at_8x; reg [1:0] at_8y; wire ul; //reg [2:0] at_finey; always @(negedge ppu_rd_n) if (ppu_a[13] & (~& ppu_a[9:8])) begin //trap nt-fetch at_8x[1:0] <= ppu_a[1:0]; //and store the 8&16px-level X at_8y[1:0] <= ppu_a[6:5]; //and Y-coordinates. end //trap nt-fetch always @(posedge m2) if (~cpu_rw & ~romsel_n) mya_atram_cs2 <= cpu_d[0]; //only have one visible register bit, so little decoding necessary: CPU$8xxx. assign mya_atram_a[7:5] = ppu_a[5:3]; assign mya_atram_a[3:1] = ppu_a[2:0]; assign mya_atram_a[0] = (ppu_a[12] ? ppu_a[6]: at_8x[1]); assign mya_atram_a[4] = (ppu_a[12] ? ppu_a[7]: at_8y[1]); assign mya_atram_a[9:8] = ppu_a[11:10]; //ppu_a[11:10] = NT-select. also don't need to be routed through CPLD assign mya_atram_a[12:10] = 3'b000; //Doing fineY-AT'd need a new write port assign ul = 1'b0; //~(at_8x | at_8y); //assign ul with 0 to not bother using ciram for any attributes at all assign mya_atram_oe_n = ~( ppu_a[13] & (& ppu_a[9:6]) //Nametable: @PPU 16'b0010_xx11_11xx_xxxx & ~ul & ~ppu_rd_n); assign mya_atram_we_n = ~( ~ppu_wr_n & (&ppu_a[13:12]) & ~ppu_a[8] ); //Write-port: @PPU 14'b11_NT?0_YYYY_XXXX: ~a[8] to avoid palettespace assign mya_atram_cs1_n = 1'b0; //assign mya_atram_cs2 = mya_atram_enable; assign ciram_ce_n = ~(ppu_a[13] //nt/at only & (~ppu_a[12]) // | (& ppu_a[11:8])) (palette ram cares not for CIRAM/CE & ((~& ppu_a[9:6]) ? ul : 1'b1) );//enable for the ul AT- and all NT- fetches. always @ (mya_atram_d or mya_atram_oe_n) begin//ppu_d mux logic //note: may need additional always@ args, (and combining) //as-is might do stupid things like making ppu_d a latch? if (mya_atram_oe_n) //that is, when not reading... ppu_d[7:0] = mya_atram_d[7:0]; else begin //when reading by AT, mux out the correct bitpair case ({at_8y[0],at_8x[0]}) 2'b00: ppu_d = {4{mya_atram_d[1:0]}}; //upper-left 2'b01: ppu_d = {4{mya_atram_d[3:2]}}; //upper-right 2'b10: ppu_d = {4{mya_atram_d[5:4]}}; //lower-left 2'b11: ppu_d = {4{mya_atram_d[7:6]}}; //lower-right default: ppu_d[7:0] = mya_atram_d[7:0]; endcase end // end //ppu_d mux logic endmodule //mya_at_mapper
Less basic
This mode of writing does not work if we want to extend to 8x1 attributes; there are three bits of attribute space to add and we only have three choices (00, 01, 10) of PPUADDR 8-9 for NT3. Even in two-screen mirroring, there is a small problem: but as we are relying on CIRAM for the first sliver of each section, one does not need to have duplicate write-access to those. One could remap $38** to what would have been in $3F**. Four-screen proves more problematic. Also problematic is determining the fine-Y. Brute-force method is to snoop for writes to PPU_SCROLL, as well as reads from PPU_STATUS and writes to PPU_ADDR to know the high-byte latch status. If we don't want to allow raster effects, which seems like a short-sighted decision, perhaps one could somehow divine where to begin from the dummy-fetch prerender scanline. In any case, it would basically require a scanline counter, at which point one would just add a few more bits of state to get a useful scanline-type interrupt, though if it shares the low three bits with the rendering portion it would be more of a NT-relative Y-coordinate interrupt...