diff --git a/firmware/lm32/Makefile b/firmware/lm32/Makefile index 5f4c9f12..b4137436 100644 --- a/firmware/lm32/Makefile +++ b/firmware/lm32/Makefile @@ -18,6 +18,7 @@ OBJECTS=isr.o \ processor.o \ hdmi_in0.o \ hdmi_in1.o \ + mix.o\ hdmi_out0.o \ hdmi_out1.o \ pattern.o \ diff --git a/firmware/lm32/ci.c b/firmware/lm32/ci.c index 1f501768..69021a2c 100644 --- a/firmware/lm32/ci.c +++ b/firmware/lm32/ci.c @@ -8,6 +8,7 @@ #include #include +#include "mix.h" #include "config.h" #include "fx2.h" #include "hdmi_in0.h" @@ -61,6 +62,16 @@ static void help_output0(void) puts(" output0 on - enable output0"); puts(" output0 off - disable output0"); } + +static void help_output0_mult_factor(void) +{ + puts("output0 multfactor (alias: 'm0')"); + puts(" set0 - set mult at input0"); + puts(" set1 - set mult at input1"); + puts(" val0 - mask values at mixer input0"); + puts(" val1 - mask values at mixer input1"); +} + #endif #ifdef CSR_HDMI_OUT1_BASE @@ -83,6 +94,14 @@ static void help_encoder(void) } #endif +static void help_floatmult(void) +{ + puts("floatmult commands (alias: 'f')"); + puts(" floatmult in1 val - set floatmult in1 equal val"); + puts(" floatmult in2 val - set floatmult in2 equal val"); + puts(" floatmult out - print floatmult out"); +} + static void help_debug(void) { puts("debug commands (alias 'd')"); @@ -118,6 +137,8 @@ static void help(void) #ifdef CSR_HDMI_OUT0_BASE help_output0(); puts(""); + help_output0_mult_factor(); + puts(""); #endif #ifdef CSR_HDMI_OUT1_BASE help_output1(); @@ -127,6 +148,8 @@ static void help(void) help_encoder(); puts(""); #endif + help_floatmult(); + puts(""); help_debug(); } @@ -495,7 +518,6 @@ static char *get_token(char **str) return d; } - void ci_prompt(void) { printf("HDMI2USB>"); @@ -637,6 +659,36 @@ void ci_service(void) help_encoder(); } #endif + + else if((strcmp(token, "mix_mult") == 0) || (strcmp(token, "m0") == 0)) { + token = get_token(&str); + + if (strcmp(token, "set0") == 0){ + hdmi_out0_driver_mult_factor0_write(atoi(get_token(&str))); + printf("HDMI_OUT0 mult factor = %d\n", hdmi_out0_driver_mult_factor0_read()); + printf("HDMI_OUT1 mult factor = %d\n", hdmi_out0_driver_mult_factor1_read()); + } + + else if (strcmp(token, "set1") == 0){ + hdmi_out0_driver_mult_factor1_write(atoi(get_token(&str))); + printf("HDMI_OUT0 mult factor = %d\n", hdmi_out0_driver_mult_factor0_read()); + printf("HDMI_OUT1 mult factor = %d\n", hdmi_out0_driver_mult_factor1_read()); + } + + else if(strcmp(token, "val0") == 0){ + printf("HDMI_OUT0 mult factor = %d\n", hdmi_out0_driver_mult_factor0_read()); + } + + else if(strcmp(token, "val1") == 0){ + printf("HDMI_OUT1 mult factor = %d\n", hdmi_out0_driver_mult_factor1_read()); + } + + else { + help_output0_mult_factor(); + } + + } + else if((strcmp(token, "status") == 0) || (strcmp(token, "s") == 0)) { token = get_token(&str); if(strcmp(token, "on") == 0) @@ -757,4 +809,5 @@ void ci_service(void) } ci_prompt(); -} + +} \ No newline at end of file diff --git a/firmware/lm32/main.c b/firmware/lm32/main.c index 817294c9..000a550b 100644 --- a/firmware/lm32/main.c +++ b/firmware/lm32/main.c @@ -18,6 +18,7 @@ #include "hdmi_out1.h" #include "fx2.h" #include "version.h" +#include "mix.h" int main(void) { @@ -62,6 +63,7 @@ int main(void) #endif ci_prompt(); while(1) { + mult_service(); processor_service(); ci_service(); diff --git a/firmware/lm32/mix.c b/firmware/lm32/mix.c new file mode 100644 index 00000000..f05f031f --- /dev/null +++ b/firmware/lm32/mix.c @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include +#include + +#include "processor.h" +#include "hdmi_in0.h" +#include "hdmi_in1.h" +#include "pattern.h" +#include "mix.h" + +static const unsigned int mult_bar[20] = { + 0 , + 10854 , + 11878 , + 12493 , + 12902 , + 13312 , + 13517 , + 13722 , + 13926 , + 14131 , + 14336 , + 14438 , + 14541 , + 14643 , + 14746 , + 14848 , + 14950 , + 15053 , + 15155 , + 15258 + + +}; + +#define FILL_RATE 20 // In Hertz, double the standard frame rate + +void mult_service(void) +{ + static int last_event; + static int counter; + +// if (mix_status) { + if(elapsed(&last_event, identifier_frequency_read()/FILL_RATE)) { + counter = counter+1; + if(counter >= (FILL_RATE-1)) { + counter = 0; + } + } +// } + + hdmi_out0_driver_mult_factor0_write(mult_bar[counter]); + hdmi_out0_driver_mult_factor1_write(mult_bar[20-1-counter]); + +} diff --git a/firmware/lm32/mix.h b/firmware/lm32/mix.h new file mode 100644 index 00000000..0973ec3d --- /dev/null +++ b/firmware/lm32/mix.h @@ -0,0 +1,12 @@ + +#ifndef __MIX_H +#define __MIX_H + +#include + +static bool mix_status; +static const unsigned int mult_bar[20]; + +void mult_service(void); + +#endif diff --git a/firmware/lm32/processor.c b/firmware/lm32/processor.c index 6a66f42c..edb387fe 100644 --- a/firmware/lm32/processor.c +++ b/firmware/lm32/processor.c @@ -508,15 +508,21 @@ void processor_update(void) #ifdef CSR_HDMI_OUT0_BASE /* hdmi_out0 */ #ifdef CSR_HDMI_IN0_BASE - if(processor_hdmi_out0_source == VIDEO_IN_HDMI_IN0) + if(processor_hdmi_out0_source == VIDEO_IN_HDMI_IN0) { hdmi_out0_fi_base0_write(hdmi_in0_framebuffer_base(hdmi_in0_fb_index)); + hdmi_out0_fi_base1_write(pattern_framebuffer_base()); + } #endif #ifdef CSR_HDMI_IN1_BASE - if(processor_hdmi_out0_source == VIDEO_IN_HDMI_IN1) + if(processor_hdmi_out0_source == VIDEO_IN_HDMI_IN1) { hdmi_out0_fi_base0_write(hdmi_in1_framebuffer_base(hdmi_in1_fb_index)); + hdmi_out0_fi_base1_write(hdmi_in1_framebuffer_base(hdmi_in1_fb_index)); + } #endif - if(processor_hdmi_out0_source == VIDEO_IN_PATTERN) + if(processor_hdmi_out0_source == VIDEO_IN_PATTERN) { hdmi_out0_fi_base0_write(pattern_framebuffer_base()); + hdmi_out0_fi_base1_write(pattern_framebuffer_base()); + } #endif #ifdef CSR_HDMI_OUT1_BASE @@ -529,8 +535,8 @@ void processor_update(void) if(processor_hdmi_out1_source == VIDEO_IN_HDMI_IN1) hdmi_out1_fi_base0_write(hdmi_in1_framebuffer_base(hdmi_in1_fb_index)); #endif - if(processor_hdmi_out1_source == VIDEO_IN_PATTERN) - hdmi_out1_fi_base0_write(pattern_framebuffer_base()); +// if(processor_hdmi_out1_source == VIDEO_IN_PATTERN) +// hdmi_out1_fi_base0_write(pattern_framebuffer_base()); #endif #ifdef ENCODER_BASE @@ -545,8 +551,8 @@ void processor_update(void) encoder_reader_base_write((hdmi_in1_framebuffer_base(hdmi_in1_fb_index))); } #endif - if(processor_encoder_source == VIDEO_IN_PATTERN) - encoder_reader_base_write(pattern_framebuffer_base()); +// if(processor_encoder_source == VIDEO_IN_PATTERN) +// encoder_reader_base_write(pattern_framebuffer_base()); #endif } diff --git a/gateware/csc/common.py b/gateware/csc/common.py index 88eb935f..56333b59 100644 --- a/gateware/csc/common.py +++ b/gateware/csc/common.py @@ -16,13 +16,21 @@ def saturate(i, o, minimum, maximum): def coef(value, cw=None): return int(value * 2**cw) if cw is not None else value - def rgb_layout(dw): return [("r", dw), ("g", dw), ("b", dw)] +def rgb16f_layout(dw): + return [("rf", dw), ("gf", dw), ("bf", dw)] def ycbcr444_layout(dw): return [("y", dw), ("cb", dw), ("cr", dw)] def ycbcr422_layout(dw): return [("y", dw), ("cb_cr", dw)] + +def pix_layout(dw): + return [("pix", dw)] + +def pixf_layout(dw): + return [("pixf", dw)] + diff --git a/gateware/csc/rgb16f2rgb.py b/gateware/csc/rgb16f2rgb.py new file mode 100644 index 00000000..87c0d0b0 --- /dev/null +++ b/gateware/csc/rgb16f2rgb.py @@ -0,0 +1,73 @@ +# rgb16f2rgb + +from migen.fhdl.std import * +from migen.genlib.record import * +from migen.flow.actor import * + +from gateware.csc.common import * + +@DecorateModule(InsertCE) +class PIXF2PIXDatapath(Module): + """ + Converts a 16 bit half precision floating point + number defined in the range [0-1] to 8 bit unsigned + int represented by a pixel in the range [0-255] + """ + latency = 2 + def __init__(self, pixf_w, pix_w): + self.sink = sink = Record(pixf_layout(pixf_w)) + self.source = source = Record(pix_layout(pix_w)) + + # # # + + # delay pixf signals + pixf_delayed = [sink] + for i in range(self.latency): + pixf_n = Record(pixf_layout(pixf_w)) + self.sync += getattr(pixf_n, "pixf").eq(getattr(pixf_delayed[-1], "pixf")) + pixf_delayed.append(pixf_n) + + + # Hardware implementation: + + # Stage 1 + # Unpack frac and exp components + # Correct exponent offset for shifting later + frac = Signal(11) + exp = Signal(5) + exp_offset = Signal((6,True)) + + self.sync += [ + + exp_offset.eq(15 - sink.pixf[10:15] -1), + frac[:10].eq(sink.pixf[:10]), + frac[10].eq(1), + ] + + # Stage 2 + # Right shift frac by exp_offset + # Most significant 8 bits of frac assigned to uint8 pix + self.sync += [ + If(exp_offset<0, + source.pix.eq(255) + ).Else( + source.pix.eq((frac >> exp_offset)[3:]), + ) + ] + + +class RGB16f2RGB(PipelinedActor, Module): + def __init__(self, rgb16f_w=16, rgb_w=8, coef_w=8): + self.sink = sink = Sink(EndpointDescription(rgb16f_layout(rgb16f_w), packetized=True)) + self.source = source = Source(EndpointDescription(rgb_layout(rgb_w), packetized=True)) + + # # # + + for name in ["r", "g", "b"]: + self.submodules.datapath = PIXF2PIXDatapath(rgb16f_w, rgb_w) + PipelinedActor.__init__(self, self.datapath.latency) + self.comb += self.datapath.ce.eq(self.pipe_ce) + self.comb += getattr(self.datapath.sink, "pixf").eq(getattr(sink, name +"f")) + self.comb += getattr(source, name).eq(getattr(self.datapath.source, "pix")) + + self.latency = self.datapath.latency diff --git a/gateware/csc/rgb2rgb16f.py b/gateware/csc/rgb2rgb16f.py new file mode 100644 index 00000000..b6591f28 --- /dev/null +++ b/gateware/csc/rgb2rgb16f.py @@ -0,0 +1,152 @@ +# rgb2rgb16f + +from migen.fhdl.std import * +from migen.genlib.record import * +from migen.flow.actor import * + +from gateware.csc.common import * + + +def lookup_table(pix_val): + ''' + Contents of lut list generated using int2float functions from + litex.csc.test.common + ''' + lut = [ + 0 ,7168 ,8192 ,8704 ,9216 ,9472 ,9728 ,9984 , + 10240 ,10368 ,10496 ,10624 ,10752 ,10880 ,11008 ,11136 , + 11264 ,11328 ,11392 ,11456 ,11520 ,11584 ,11648 ,11712 , + 11776 ,11840 ,11904 ,11968 ,12032 ,12096 ,12160 ,12224 , + 12288 ,12320 ,12352 ,12384 ,12416 ,12448 ,12480 ,12512 , + 12544 ,12576 ,12608 ,12640 ,12672 ,12704 ,12736 ,12768 , + 12800 ,12832 ,12864 ,12896 ,12928 ,12960 ,12992 ,13024 , + 13056 ,13088 ,13120 ,13152 ,13184 ,13216 ,13248 ,13280 , + 13312 ,13328 ,13344 ,13360 ,13376 ,13392 ,13408 ,13424 , + 13440 ,13456 ,13472 ,13488 ,13504 ,13520 ,13536 ,13552 , + 13568 ,13584 ,13600 ,13616 ,13632 ,13648 ,13664 ,13680 , + 13696 ,13712 ,13728 ,13744 ,13760 ,13776 ,13792 ,13808 , + 13824 ,13840 ,13856 ,13872 ,13888 ,13904 ,13920 ,13936 , + 13952 ,13968 ,13984 ,14000 ,14016 ,14032 ,14048 ,14064 , + 14080 ,14096 ,14112 ,14128 ,14144 ,14160 ,14176 ,14192 , + 14208 ,14224 ,14240 ,14256 ,14272 ,14288 ,14304 ,14320 , + 14336 ,14344 ,14352 ,14360 ,14368 ,14376 ,14384 ,14392 , + 14400 ,14408 ,14416 ,14424 ,14432 ,14440 ,14448 ,14456 , + 14464 ,14472 ,14480 ,14488 ,14496 ,14504 ,14512 ,14520 , + 14528 ,14536 ,14544 ,14552 ,14560 ,14568 ,14576 ,14584 , + 14592 ,14600 ,14608 ,14616 ,14624 ,14632 ,14640 ,14648 , + 14656 ,14664 ,14672 ,14680 ,14688 ,14696 ,14704 ,14712 , + 14720 ,14728 ,14736 ,14744 ,14752 ,14760 ,14768 ,14776 , + 14784 ,14792 ,14800 ,14808 ,14816 ,14824 ,14832 ,14840 , + 14848 ,14856 ,14864 ,14872 ,14880 ,14888 ,14896 ,14904 , + 14912 ,14920 ,14928 ,14936 ,14944 ,14952 ,14960 ,14968 , + 14976 ,14984 ,14992 ,15000 ,15008 ,15016 ,15024 ,15032 , + 15040 ,15048 ,15056 ,15064 ,15072 ,15080 ,15088 ,15096 , + 15104 ,15112 ,15120 ,15128 ,15136 ,15144 ,15152 ,15160 , + 15168 ,15176 ,15184 ,15192 ,15200 ,15208 ,15216 ,15224 , + 15232 ,15240 ,15248 ,15256 ,15264 ,15272 ,15280 ,15288 , + 15296 ,15304 ,15312 ,15320 ,15328 ,15336 ,15344 ,15352 + ] + return lut[pix_val] + +class LeadOne(Module): + def __init__(self): + + self.datai = Signal(8) + self.leadone = Signal(4) + for j in range(8): + self.comb += If(self.datai[j], self.leadone.eq(8 - j-1)) + +@DecorateModule(InsertCE) +class PIX2PIXFLUT(Module): + """ + Converts a 8 bit unsigned int represented by a pixel in + the range [0-255] to a 16 bit half precision floating point + pix_number defined in the range [0-1], using a look table + """ + latency = 1 + + def __init__(self, pix_w, pixf_w): + self.sink = sink = Record(pix_layout(pix_w)) + self.source = source = Record(pixf_layout(pixf_w)) + + # # # + + # delay pix signal + pix_delayed = [sink] + for i in range(self.latency): + pix_n = Record(pix_layout(pix_w)) + self.sync += getattr(pix_n, "pix").eq(getattr(pix_delayed[-1], "pix")) + pix_delayed.append(pix_n) + + # Hardware implementation: + + # Stage 1 + for j in range(256): + self.sync += If(sink.pix==j, source.pixf.eq(lookup_table(j))) + +@DecorateModule(InsertCE) +class PIX2PIXFDatapath(Module): + """ Converts a 8 bit unsigned int represented by a pixel in + the range [0-255] to a 16 bit half precision floating point + pix_number defined in the range [0-1] """ + + latency = 2 + def __init__(self, pix_w, pixf_w): + + self.sink = sink = Record(pix_layout(pix_w)) + self.source = source = Record(pixf_layout(pixf_w)) + + # # # + + # delay pix signal + pix_delayed = [sink] + for i in range(self.latency): + pix_n = Record(pix_layout(pix_w)) + self.sync += getattr(pix_n, "pix").eq(getattr(pix_delayed[-1], "pix")) + pix_delayed.append(pix_n) + + # Hardware implementation: + + # Stage 1 + # Leading one detector + + lshift = Signal(4) + frac_val = Signal(10) + + self.submodules.l1 = LeadOne() + self.comb += [ + self.l1.datai.eq(sink.pix) + ] + + self.sync += [ + + lshift.eq(self.l1.leadone), + frac_val[3:].eq(sink.pix[:7]), + frac_val[:3].eq(0) + ] + + # Stage 2 + # Adjust frac and exp components as per lshift + # Pack in 16bit float + + self.sync += [ + source.pixf[:10].eq(frac_val << lshift), + source.pixf[10:15].eq(15 - 1 - lshift), + source.pixf[15].eq(1) + ] + +class RGB2RGB16f(PipelinedActor, Module): + def __init__(self, rgb_w=8, rgb16f_w=16): + self.sink = sink = Sink(EndpointDescription(rgb_layout(rgb_w), packetized=True)) + self.source = source = Source(EndpointDescription(rgb16f_layout(rgb16f_w), packetized=True)) + + # # # + + for name in ["r", "g", "b"]: + self.submodules.datapath = PIX2PIXFLUT(rgb_w, rgb16f_w) + PipelinedActor.__init__(self, self.datapath.latency) + self.comb += self.datapath.ce.eq(self.pipe_ce) + self.comb += getattr(self.datapath.sink, "pix").eq(getattr(sink, name)) + self.comb += getattr(source, name + "f").eq(getattr(self.datapath.source, "pixf")) + + self.latency = self.datapath.latency diff --git a/gateware/csc/test/Makefile b/gateware/csc/test/Makefile index 69c47170..a64025d9 100644 --- a/gateware/csc/test/Makefile +++ b/gateware/csc/test/Makefile @@ -9,6 +9,12 @@ rgb2ycbcr_tb: ycbcr2rgb_tb: $(CMD) ycbcr2rgb_tb.py +rgb2rgb16f_tb: + $(CMD) rgb2rgb16f_tb.py + +rgb16f2rgb_tb: + $(CMD) rgb16f2rgb_tb.py + ycbcr_resampling_tb: $(CMD) ycbcr_resampling_tb.py diff --git a/gateware/csc/test/common.py b/gateware/csc/test/common.py index ac981b54..09bd627d 100644 --- a/gateware/csc/test/common.py +++ b/gateware/csc/test/common.py @@ -169,6 +169,10 @@ def __init__(self, coefs, filename=None, size=None): self.cb = None self.cr = None + self.r_f = None + self.g_f = None + self.b_f = None + self.data = [] self.coefs = coefs @@ -176,7 +180,7 @@ def __init__(self, coefs, filename=None, size=None): self.length = None if filename is not None: - self.open(filename) + self.open(filename) def open(self, filename): @@ -206,11 +210,15 @@ def set_ycbcr(self, y, cb, cr): self.cr = cr self.length = len(y) + def set_rgb16f(self, r_f, g_f, b_f): + self.r_f = r_f + self.g_f = g_f + self.b_f = b_f + self.length = len(r_f) def set_data(self, data): self.data = data - def pack_rgb(self): self.data = [] for i in range(self.length): @@ -220,7 +228,6 @@ def pack_rgb(self): self.data.append(data) return self.data - def pack_ycbcr(self): self.data = [] for i in range(self.length): @@ -230,6 +237,14 @@ def pack_ycbcr(self): self.data.append(data) return self.data + def pack_rgb16f(self): + self.data = [] + for i in range(self.length): + data = (self.r_f[i] & 0xffff) << 32 + data |= (self.g_f[i] & 0xffff) << 16 + data |= (self.b_f[i] & 0xffff) << 0 + self.data.append(data) + return self.data def unpack_rgb(self): self.r = [] @@ -239,6 +254,7 @@ def unpack_rgb(self): self.r.append((data >> 16) & 0xff) self.g.append((data >> 8) & 0xff) self.b.append((data >> 0) & 0xff) + print(self.r[0]) return self.r, self.g, self.b @@ -253,6 +269,16 @@ def unpack_ycbcr(self): return self.y, self.cb, self.cr + def unpack_rgb16f(self): + self.r_f = [] + self.g_f = [] + self.b_f = [] + for data in self.data: + self.r_f.append((data >> 32) & 0xffff) + self.g_f.append((data >> 16) & 0xffff) + self.b_f.append((data >> 0 ) & 0xffff) + return self.r_f, self.g_f, self.b_f + # Model for our implementation def rgb2ycbcr_model(self): self.y = [] @@ -299,6 +325,69 @@ def ycbcr2rgb(self): self.r.append(int(y + (cr - 128) * 1.402)) self.g.append(int(y + (cb - 128) * -0.34414 + (cr - 128) * -0.71414)) self.b.append(int(y + (cb - 128) * 1.772)) - return self.r, self.g, self.b + return self.r, self.g, self.b + # Convert 16 bit float to 8 bit pixel + def rgb16f2rgb_model(self): + self.r = [] + self.g = [] + self.b = [] + for r_f, g_f, b_f in zip(self.r_f, self.g_f, self.b_f): + self.r.append(float2int(r_f)) + self.g.append(float2int(g_f)) + self.b.append(float2int(b_f)) + return self.r, self.g, self.b + # Convert 8 bit pixel to 16 bit float + def rgb2rgb16f_model(self): + self.r_f = [] + self.g_f = [] + self.b_f = [] + for r, g, b in zip(self.r, self.g, self.b): + self.r_f.append(int2float(r)) + self.g_f.append(int2float(g)) + self.b_f.append(int2float(b)) + return self.r_f, self.g_f, self.b_f + +def int2float(x): + ''' + Converts a 8 bit unsigned int to 16 bit half precision floating + point represntation.Expected input is in the range [0-255] + Output is an 16 bit integer whose bit representation correspond + to half precision float format. + The value of float output is in the range [0-1] + (higher precision in this range) + ''' + if x==0: + return 0 + else: + y = bin(x)[2:].zfill(8) # Unpack in string + for i in range(len(y)): # Leading one detector + if y[i] == '1': + shift_val = i + break + + sign = '0' + exp = 15 - 1 - shift_val + frac = y[shift_val+1:][::-1].zfill(10)[::-1] + x = sign+bin(exp)[2:].zfill(5)+frac # Pack together in string + z = int(x, 2) # Convert string to correspondinf float + return z + +def float2int(x): + ''' + Converts a 16 bit half precision floating point represntation + to 8 bit unsigned int. + Output is an 16 bit integer whose bit representation correspond + to half precision float format. + Input is in the range [0-1] + Expected output is in the corresponding range [0-255] + + ''' + if x==0: + return 0 + else: + y = bin(x)[2:].zfill(16) # Unpack in string + exp = y[1:6] # Unpack exp + frac = '1'+y[6:16] # Unpack frac + return int(frac,2) >> (17-int(exp,2)) diff --git a/gateware/csc/test/rgb16f2rgb_tb.py b/gateware/csc/test/rgb16f2rgb_tb.py new file mode 100644 index 00000000..51d75080 --- /dev/null +++ b/gateware/csc/test/rgb16f2rgb_tb.py @@ -0,0 +1,51 @@ +from migen.fhdl.std import * +from migen.sim.generic import run_simulation +from migen.flow.actor import EndpointDescription + +from gateware.csc.common import * +from gateware.csc.rgb16f2rgb import RGB16f2RGB + +from gateware.csc.test.common import * + +class TB(Module): + def __init__(self): + self.submodules.streamer = PacketStreamer(EndpointDescription([("data", 48)], packetized=True)) + self.submodules.rgb16f2rgb = RGB16f2RGB() + self.submodules.logger = PacketLogger(EndpointDescription([("data", 24)], packetized=True)) + + self.comb += [ + Record.connect(self.streamer.source, self.rgb16f2rgb.sink, leave_out=["data"]), + self.rgb16f2rgb.sink.payload.rf.eq(self.streamer.source.data[32:48]), + self.rgb16f2rgb.sink.payload.gf.eq(self.streamer.source.data[16:32]), + self.rgb16f2rgb.sink.payload.bf.eq(self.streamer.source.data[0:16]), + + Record.connect(self.rgb16f2rgb.source, self.logger.sink, leave_out=["r", "g", "b"]), + self.logger.sink.data[16:24].eq(self.rgb16f2rgb.source.r), + self.logger.sink.data[8:16].eq(self.rgb16f2rgb.source.g), + self.logger.sink.data[0:8].eq(self.rgb16f2rgb.source.b) + ] + + def gen_simulation(self, selfp): +# convert image using rgb16f2rgb model + raw_image = RAWImage(None, "lena.png", 64) + raw_image.rgb2rgb16f_model() + raw_image.rgb16f2rgb_model() + raw_image.save("lena_rgb16f2rgb_reference.png") + + for i in range(24): + yield + + # convert image using rgb16f2rgb implementation + raw_image = RAWImage(None, "lena.png", 64) + raw_image.rgb2rgb16f_model() + raw_image.pack_rgb16f() + packet = Packet(raw_image.data) + self.streamer.send(packet) + yield from self.logger.receive() + raw_image.set_data(self.logger.packet) + raw_image.unpack_rgb() + raw_image.save("lena_rgb16f2rgb.png") + + +if __name__ == "__main__": + run_simulation(TB(), ncycles=8192, vcd_name="my.vcd", keep_files=True) diff --git a/gateware/csc/test/rgb2rgb16f_tb.py b/gateware/csc/test/rgb2rgb16f_tb.py new file mode 100644 index 00000000..a106f628 --- /dev/null +++ b/gateware/csc/test/rgb2rgb16f_tb.py @@ -0,0 +1,52 @@ +from migen.fhdl.std import * +from migen.sim.generic import run_simulation +from migen.flow.actor import EndpointDescription + +from gateware.csc.common import * +from gateware.csc.rgb2rgb16f import RGB2RGB16f + +from gateware.csc.test.common import * + + +class TB(Module): + def __init__(self): + self.submodules.streamer = PacketStreamer(EndpointDescription([("data", 24)], packetized=True)) + self.submodules.rgb2rgb16f = RGB2RGB16f() + self.submodules.logger = PacketLogger(EndpointDescription([("data", 48)], packetized=True)) + + self.comb += [ + Record.connect(self.streamer.source, self.rgb2rgb16f.sink, leave_out=["data"]), + self.rgb2rgb16f.sink.payload.r.eq(self.streamer.source.data[16:24]), + self.rgb2rgb16f.sink.payload.g.eq(self.streamer.source.data[8:16]), + self.rgb2rgb16f.sink.payload.b.eq(self.streamer.source.data[0:8]), + + Record.connect(self.rgb2rgb16f.source, self.logger.sink, leave_out=["rf", "gf", "bf"]), + self.logger.sink.data[32:48].eq(self.rgb2rgb16f.source.rf), + self.logger.sink.data[16:32].eq(self.rgb2rgb16f.source.gf), + self.logger.sink.data[ 0:16].eq(self.rgb2rgb16f.source.bf) + ] + + + def gen_simulation(self, selfp): + # convert image using rgb2ycbcr model + raw_image = RAWImage(None, "lena.png", 64) + raw_image.rgb2rgb16f_model() + raw_image.rgb16f2rgb_model() + raw_image.save("lena_rgb2rgb16f_reference.png") + + for i in range(24): + yield + + # convert image using rgb2ycbcr implementation + raw_image = RAWImage(None, "lena.png", 64) + raw_image.pack_rgb() + packet = Packet(raw_image.data) + self.streamer.send(packet) + yield from self.logger.receive() + raw_image.set_data(self.logger.packet) + raw_image.unpack_rgb16f() + raw_image.rgb16f2rgb_model() + raw_image.save("lena_rgb2rgb16f.png") + +if __name__ == "__main__": + run_simulation(TB(), ncycles=8192, vcd_name="my.vcd", keep_files=True) diff --git a/gateware/float_arithmetic/__init__.py b/gateware/float_arithmetic/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/gateware/float_arithmetic/common.py b/gateware/float_arithmetic/common.py new file mode 100644 index 00000000..c52bb42a --- /dev/null +++ b/gateware/float_arithmetic/common.py @@ -0,0 +1,17 @@ +from migen.fhdl.std import * + +def in_layout(dw): + return [("in1", dw), ("in2", dw)] + +def out_layout(dw): + return [("out", dw)] + +def rgb16f_layout(dw): + return [("rf", dw), ("gf", dw), ("bf", dw)] + +def rgb_layout(dw): + return [("r", dw), ("g", dw), ("b", dw)] + +def floatin_layout(dw): + return [("r1", dw), ("g1", dw), ("b1", dw), ("r2", dw), ("g2", dw), ("b2", dw)] + diff --git a/gateware/float_arithmetic/floatadd.py b/gateware/float_arithmetic/floatadd.py new file mode 100644 index 00000000..521d1be6 --- /dev/null +++ b/gateware/float_arithmetic/floatadd.py @@ -0,0 +1,225 @@ +''' +FloatAddDatapath class: Add two floating point numbers in1 and in2, returns +their output out in the same float16 format. + +FloatAdd class: Use the FloatAddDatapath above and generates a pipelined +module implemented using five stage pipeline. +''' + +from migen.fhdl.std import * +from migen.genlib.record import * +from migen.bank.description import * +from migen.flow.actor import * + +from gateware.float_arithmetic.common import * + +from gateware.csc.rgb2rgb16f import RGB2RGB16f +from gateware.csc.rgb16f2rgb import RGB16f2RGB +from gateware.float_arithmetic.floatmult import FloatMultRGB + +class LeadOne(Module): + def __init__(self): + + self.datai = Signal(12) + self.leadone = Signal(4) + for j in range(12): + self.comb += If(self.datai[j], self.leadone.eq(12 - j-1)) + +@DecorateModule(InsertCE) +class FloatAddDatapath(Module): + + latency = 5 + + def __init__(self,dw): + self.sink = sink = Record(in_layout(dw)) + self.source = source = Record(out_layout(dw)) + + # delay rgb signals + in_delayed = [sink] + for i in range(self.latency): + in_n = Record(in_layout(dw)) + for name in ["in1", "in2"]: + self.sync += getattr(in_n, name).eq(getattr(in_delayed[-1], name)) + in_delayed.append(in_n) + + # Hardware implementation: + + # Stage 1 + # Unpack + # Substract Exponents + + in1_frac = Signal(10) + in2_frac = Signal(10) + + in1_mant = Signal(11) + in2_mant = Signal(11) + + in1_exp = Signal(5) + in2_exp = Signal(5) + + in1_minus_in2_exp = Signal((6,True)) + + in1_exp1 = Signal(5) + in2_exp1 = Signal(5) + + in1_sign = Signal() + in2_sign = Signal() + + + out_status1 = Signal(2) + # 00-0 Zero + # 01-1 Inf + # 10-2 Nan + # 11-3 Normal + + in1_stage1 = Signal(16) + in2_stage1 = Signal(16) + + self.comb += [ + in1_frac.eq( sink.in1[:10] ), + in2_frac.eq( sink.in2[:10] ), + + in1_exp.eq( sink.in1[10:15] ), + in2_exp.eq( sink.in2[10:15] ), + + in1_sign.eq( sink.in1[15] ), + in2_sign.eq( sink.in1[15] ), + + ] + + self.comb += [ + If( in1_exp==0, + in1_mant.eq( Cat(sink.in1[:10], 0)), + in1_exp1.eq( sink.in1[10:15] + 1 ) + ).Else( + in1_mant.eq( Cat(sink.in1[:10], 1)), + in1_exp1.eq( sink.in1[10:15]) + ), + + If( in2_exp==0, + in2_mant.eq( Cat(sink.in2[:10], 0)), + in2_exp1.eq( sink.in2[10:15] + 1 ) + ).Else( + in2_mant.eq( Cat(sink.in2[:10], 1)), + in2_exp1.eq( sink.in2[10:15]) + ) + ] + + in1_frac_stage1 = Signal(11) + in2_frac_stage1 = Signal(11) + in1_sign_stage1 = Signal(11) + in2_sign_stage1 = Signal(11) + in1_exp_stage1 = Signal(5) + in2_exp_stage1 = Signal(5) + + self.sync += [ + + in1_minus_in2_exp.eq(in1_exp1 - in2_exp), + in1_frac_stage1.eq(in1_mant), + in2_frac_stage1.eq(in2_mant), + in1_exp_stage1.eq(in1_exp1), + in2_exp_stage1.eq(in2_exp1), + in1_sign_stage1.eq(in1_sign), + in2_sign_stage1.eq(in2_sign), + out_status1.eq(3), + + ] + + # Stage 2 + # Adjust both the input fracs to common exponent + in1_frac_stage2 = Signal(11) + in2_frac_stage2 = Signal(11) + in1_sign_stage2 = Signal(11) + in2_sign_stage2 = Signal(11) + in1_minus_in2_exp_stage2 = Signal(5) + out_2 = Signal(16) + + self.sync += [ + + If( ~in1_minus_in2_exp[5], [ + in2_frac_stage2.eq(in2_frac_stage1 >> in1_minus_in2_exp), + in1_frac_stage2.eq(in1_frac_stage1), + in1_minus_in2_exp_stage2.eq(in1_exp_stage1) + ] + ).Else ( [ + in1_frac_stage2.eq(in1_frac_stage1 >> (-1)*in1_minus_in2_exp ), + in1_minus_in2_exp_stage2.eq(in2_exp_stage1), + in2_frac_stage2.eq(in2_frac_stage1), + ] + ), + in1_sign_stage2.eq(in1_sign_stage1), + in2_sign_stage2.eq(in2_sign_stage1), + ] + + # Stage 3 + # Adder Unit + in1_plus_in2_frac = Signal(12) + in1_plus_in2_sign = Signal(1) + in1_minus_in2_exp_stage3 = Signal(5) + out_3 = Signal(16) + + self.sync += [ + Cat(in1_plus_in2_frac, in1_plus_in2_sign).eq(in1_frac_stage2+in2_frac_stage2), + in1_minus_in2_exp_stage3.eq(in1_minus_in2_exp_stage2), + out_3.eq(out_2) + ] + + # Stage 4 + # Shift and Adjust + leadone = Signal(4) + self.submodules.l1 = LeadOne() + self.comb += [ + self.l1.datai.eq(in1_plus_in2_frac), + leadone.eq(self.l1.leadone) + ] + out_sign_stage4 = Signal(1) + out_frac_stage4 = Signal(12) + out_exp_stage4 = Signal(5) + out_4 = Signal(16) + self.sync += [ + out_frac_stage4.eq(in1_plus_in2_frac << (leadone)), + out_exp_stage4.eq(in1_minus_in2_exp_stage3 - leadone + 1 ), + out_sign_stage4.eq(in1_plus_in2_sign), + out_4.eq(out_frac_stage4) + ] + + # stage 5 + # Normalize and pack + self.sync += [ + source.out.eq( Cat( out_frac_stage4[1:11] , out_exp_stage4 ,out_sign_stage4 ) ) + ] + +class FloatAdd(PipelinedActor, Module): + def __init__(self, dw=16): + self.sink = sink = Sink(EndpointDescription(in_layout(dw), packetized=True)) + self.source = source = Source(EndpointDescription(out_layout(dw), packetized=True)) + + # # # + + self.submodules.datapath = FloatAddDatapath(dw) + PipelinedActor.__init__(self, self.datapath.latency) + self.comb += self.datapath.ce.eq(self.pipe_ce) + for name in ["in1", "in2"]: + self.comb += getattr(self.datapath.sink, name).eq(getattr(sink, name)) + self.comb += getattr(source, "out").eq(getattr(self.datapath.source, "out")) + self.latency = self.datapath.latency + +class FloatAddRGB(PipelinedActor, Module): + def __init__(self, dw=16): + self.sink = sink = Sink(EndpointDescription(floatin_layout(dw), packetized=True)) +# self.sink2 = sink2 = Sink(EndpointDescription(rgb16f_layout(dw), packetized=True)) + self.source = source = Source(EndpointDescription(rgb16f_layout(dw), packetized=True)) + + # # # + + for name in ["r", "g", "b"]: + self.submodules.datapath = FloatAddDatapath(dw) + PipelinedActor.__init__(self, self.datapath.latency) + self.comb += self.datapath.ce.eq(self.pipe_ce) + self.comb += self.datapath.sink.in1.eq(getattr(sink, name + "1")) +# self.comb += self.datapath.sink.in2.eq(0) + self.comb += self.datapath.sink.in2.eq(getattr(sink, name + "2")) + self.comb += getattr(source, name + "f").eq(self.datapath.source.out) + + self.latency = self.datapath.latency + diff --git a/gateware/float_arithmetic/floatmult.py b/gateware/float_arithmetic/floatmult.py new file mode 100644 index 00000000..ddb351fb --- /dev/null +++ b/gateware/float_arithmetic/floatmult.py @@ -0,0 +1,200 @@ +''' +FloatMultDatapath class: Multiply two floating point numbers a and b, returns +their output c in the same float16 format. + +FloatMult class: Use the FloatMultDatapath above and generates a modules +implemented using five stage pipeline. +''' + +from migen.fhdl.std import * +from migen.genlib.record import * +from migen.bank.description import * +from migen.flow.actor import * + +from gateware.float_arithmetic.common import * + +class LeadOne(Module): + """ + This return the position of leading one of the Signal Object datai, as the + leadone Signal object. Function input dw defines the data width of datai + Signal object. + """ + def __init__(self,dw): + self.datai = Signal(dw) + self.leadone = Signal(max=dw) + for j in range(dw): + self.comb += If(self.datai[j], self.leadone.eq(dw - j-1)) + +@DecorateModule(InsertCE) +class FloatMultDatapath(Module): + """ + This adds a floating point multiplication unit. + Inputs: in1 and in2 + Output: out + Implemented as a 5 stage pipeline, design is based on float16 design doc. + Google Docs Link: https://goo.gl/Rvx2B7 + """ + latency = 5 + def __init__(self, dw): + self.sink = sink = Record(in_layout(dw)) + self.source = source = Record(out_layout(dw)) + + # delay rgb signals + in_delayed = [sink] + for i in range(self.latency): + in_n = Record(in_layout(dw)) + for name in ["in1", "in2"]: + self.sync += getattr(in_n, name).eq(getattr(in_delayed[-1], name)) + in_delayed.append(in_n) + + # stage 1 + # Unpack + # Look for special cases + + in1_mant = Signal(11) + in2_mant = Signal(11) + + in1_exp1 = Signal(5) + in2_exp1 = Signal(5) + +# in1_sign = Signal() +# in2_sign = Signal() + + out_status1 = Signal(2) + status_stage1 = Signal(16) + # 00-0 Zero + # 01-1 Inf + # 10-2 Nan + # 11-3 Normal + + self.sync += [ + If(sink.in1[10:15]==0, + in1_mant.eq( Cat(sink.in1[:10], 0)), + in1_exp1.eq(sink.in1[10:15] + 1 ) + ).Else( + in1_mant.eq( Cat(sink.in1[:10], 1)), + in1_exp1.eq(sink.in1[10:15]) + ), + + If(sink.in2[10:15]==0, + in2_mant.eq( Cat(sink.in2[:10], 0)), + in2_exp1.eq(sink.in2[10:15] + 1 ) + ).Else( + in2_mant.eq( Cat(sink.in2[:10], 1)), + in2_exp1.eq(sink.in2[10:15]) + ), + out_status1.eq(3), + status_stage1.eq(sink.in2[10:15]) + ] + + # stage 2 + # Multiply fractions and add exponents + out_mult = Signal(22) + out_exp = Signal((7,True)) + out_status2 = Signal(2) + status_stage2 = Signal(16) + + self.sync += [ + out_mult.eq(in1_mant * in2_mant), + out_exp.eq(in1_exp1 + in2_exp1 - 15), + out_status2.eq(out_status1), + status_stage2.eq(status_stage1) + ] + + # stage 3 + # Leading one detector + one_ptr = Signal(5) + out_status3 = Signal(2) + out_mult3 = Signal(22) + out_exp3 = Signal((7,True)) + status_stage3 = Signal(16) + + lead_one_ptr = Signal(5) + self.submodules.leadone = LeadOne(22) + self.comb += [ + self.leadone.datai.eq(out_mult), + lead_one_ptr.eq(self.leadone.leadone) + ] + + self.sync += [ + out_status3.eq(out_status2), + out_mult3.eq(out_mult), + out_exp3.eq(out_exp), + one_ptr.eq(lead_one_ptr), + status_stage3.eq(status_stage2) + ] + + # stage 4 + # Shift and Adjust + out_exp_adjust = Signal((7,True)) + out_mult_shift = Signal(22) + out_status4 = Signal(2) + status_stage4 = Signal(16) + + self.sync += [ + out_status4.eq(3), + If((out_exp3 - one_ptr) < 1, + out_exp_adjust.eq(0), + out_mult_shift.eq(((out_mult3 >> (0-out_exp3)) << 1)) + ).Else( + out_exp_adjust.eq(out_exp3 +1 - one_ptr), + out_mult_shift.eq(out_mult3 << one_ptr+1) + ), + + ] + + # stage 5 + # Normalize and pack + self.sync += [ + If(out_status4 == 3, + source.out.eq( Cat(out_mult_shift[12:], out_exp_adjust[:5],0) ) + ), +# source.out.eq(status_stage4) + ] + + + +class FloatMult(PipelinedActor, Module, AutoCSR): + def __init__(self, dw=16): + self.sink = sink = Sink(EndpointDescription(in_layout(dw), packetized=True)) + self.source = source = Source(EndpointDescription(out_layout(dw), packetized=True)) + + # # # + + self.submodules.datapath = FloatMultDatapath(dw) + PipelinedActor.__init__(self, self.datapath.latency) + self.comb += self.datapath.ce.eq(self.pipe_ce) + for name in ["in1", "in2"]: + self.comb += getattr(self.datapath.sink, name).eq(getattr(sink, name)) + self.comb += getattr(source, "out").eq(getattr(self.datapath.source, "out")) + self.latency = self.datapath.latency + +# self._float_in1 = CSRStorage(dw) +# self._float_in2 = CSRStorage(dw) +# self._float_out = CSRStatus(dw) + +# self.comb += [ +# getattr(sink, "in1").eq(self._float_in1.storage), +# getattr(sink, "in2").eq(self._float_in2.storage), +# self._float_out.status.eq(getattr(source, "out")) +# ] + + +class FloatMultRGB(PipelinedActor, Module, AutoCSR): + def __init__(self, dw=16): + self.sink = sink = Sink(EndpointDescription(floatin_layout(dw), packetized=True)) + self.source = source = Source(EndpointDescription(rgb16f_layout(dw), packetized=True)) + + # # # + + for name in ["r", "g", "b"]: + self.submodules.datapath = FloatMultDatapath(dw) + PipelinedActor.__init__(self, self.datapath.latency) + self.comb += self.datapath.ce.eq(self.pipe_ce) + self.comb += self.datapath.sink.in1.eq(getattr(sink, name + "1")) +# self.comb += self.datapath.sink.in2.eq(0) + self.comb += self.datapath.sink.in2.eq(getattr(sink, name + "2")) + self.comb += getattr(source, name + "f").eq(self.datapath.source.out) + + self.latency = self.datapath.latency + diff --git a/gateware/float_arithmetic/test/Makefile b/gateware/float_arithmetic/test/Makefile new file mode 100644 index 00000000..b5fed702 --- /dev/null +++ b/gateware/float_arithmetic/test/Makefile @@ -0,0 +1,15 @@ +HDLDIR = ../../../ +PYTHON = python3 + +CMD = PYTHONPATH=$(HDLDIR) $(PYTHON) + +mult_tb: + $(CMD) floatmult_tb.py + +add_tb: + $(CMD) floatadd_tb.py + +clean: + rm -rf *_*.png *.vvp *.v *.vcd + +.PHONY: clean \ No newline at end of file diff --git a/gateware/float_arithmetic/test/common.py b/gateware/float_arithmetic/test/common.py new file mode 100644 index 00000000..a65556d0 --- /dev/null +++ b/gateware/float_arithmetic/test/common.py @@ -0,0 +1,236 @@ +from PIL import Image + +import random +import copy +import numpy as np + + +from migen.fhdl.std import * +from migen.flow.actor import Sink, Source +from migen.genlib.record import * + + +def seed_to_data(seed, random=True): + if random: + return (seed * 0x31415979 + 1) & 0xffffffff + else: + return seed + + +def comp(p1, p2): + r = True + for x, y in zip(p1, p2): + if x != y: + r = False + return r + + +def check(p1, p2): + p1 = copy.deepcopy(p1) + p2 = copy.deepcopy(p2) + if isinstance(p1, int): + return 0, 1, int(p1 != p2) + else: + if len(p1) >= len(p2): + ref, res = p1, p2 + else: + ref, res = p2, p1 + shift = 0 + while((ref[0] != res[0]) and (len(res) > 1)): + res.pop(0) + shift += 1 + length = min(len(ref), len(res)) + errors = 0 + for i in range(length): + if ref.pop(0) != res.pop(0): + errors += 1 + return shift, length, errors + + +def randn(max_n): + return random.randint(0, max_n-1) + + +class Packet(list): + def __init__(self, init=[]): + self.ongoing = False + self.done = False + for data in init: + self.append(data) + + +class PacketStreamer(Module): + def __init__(self, description, last_be=None): + self.source = Source(description) + self.last_be = last_be + + # # # + + self.packets = [] + self.packet = Packet() + self.packet.done = True + + def send(self, packet): + packet = copy.deepcopy(packet) + self.packets.append(packet) + return packet + + def send_blocking(self, packet): + packet = self.send(packet) + while not packet.done: + yield + + def do_simulation(self, selfp): + if len(self.packets) and self.packet.done: + self.packet = self.packets.pop(0) + if not self.packet.ongoing and not self.packet.done: + selfp.source.stb = 1 + if self.source.description.packetized: + selfp.source.sop = 1 + selfp.source.data = self.packet.pop(0) + self.packet.ongoing = True + elif selfp.source.stb == 1 and selfp.source.ack == 1: + if self.source.description.packetized: + selfp.source.sop = 0 + if len(self.packet) == 1: + selfp.source.eop = 1 + if self.last_be is not None: + selfp.source.last_be = self.last_be + else: + selfp.source.eop = 0 + if self.last_be is not None: + selfp.source.last_be = 0 + if len(self.packet) > 0: + selfp.source.stb = 1 + selfp.source.data = self.packet.pop(0) + else: + self.packet.done = True + selfp.source.stb = 0 + + +class PacketLogger(Module): + def __init__(self, description): + self.sink = Sink(description) + + # # # + + self.packet = Packet() + + def receive(self): + self.packet.done = False + while not self.packet.done: + yield + + def do_simulation(self, selfp): + selfp.sink.ack = 1 + if selfp.sink.stb: + if self.sink.description.packetized: + if selfp.sink.sop: + self.packet = Packet() + self.packet.append(selfp.sink.data) + else: + self.packet.append(selfp.sink.data) + if selfp.sink.eop: + self.packet.done = True + else: + self.packet.append(selfp.sink.data) + + +class AckRandomizer(Module): + def __init__(self, description, level=0): + self.level = level + + self.sink = Sink(description) + self.source = Source(description) + + self.run = Signal() + + self.comb += \ + If(self.run, + Record.connect(self.sink, self.source) + ).Else( + self.source.stb.eq(0), + self.sink.ack.eq(0), + ) + + def do_simulation(self, selfp): + n = randn(100) + if n < self.level: + selfp.run = 0 + else: + selfp.run = 1 + + +class RAWImage: + def __init__(self, coefs, filename=None, size=None): + self.a = None + self.b = None + self.c = None + + self.data = [] + + self.length = None + + self.open() + + + def open(self): + + a = [15360]*256 +# a = [float2binint(float(1)/256) , float2binint(float(3)/256) , float2binint(float(7)/256)] +# b = [float2binint(0.0)]*256 + b = [float2binint(float(x)/256) for x in range(256)] + + self.set_mult_in(a, b) + + def set_mult_in(self, a, b): + self.a = a + self.b = b + self.length = len(a) + + def set_data(self, data): + self.data = data + + def pack_mult_in(self): + self.data = [] + for i in range(self.length): + data = (self.a[i] & 0xffff) << 16 + data |= (self.b[i] & 0xffff) << 0 + self.data.append(data) + q = bin(data)[2:].zfill(32) + return self.data + + def unpack_mult_in(self): + self.c = [] + for data in self.data: + self.c.append((data >> 0) & 0xffff) + print("Output starts here") + for i in range(len(self.c)): + print(bin(self.c[i])[2:6].zfill(5) ) +# print(binint2float(self.c[i])) + + return self.c + + +def float2binint(f): + x = int(np.float16(f).view('H')) + return x + + +def binint2float(x): + xs = bin(x)[2:].zfill(16) + frac = '1'+xs[6:16] + fracn = int(frac,2) + exp = xs[1:6] + expn = int(exp,2) -15 + + if expn == -15 : #subnormal numbers + expn = -14 + frac = '0'+xs[6:16] + fracn = int(frac,2) + + sign = xs[0] + signv = int(sign,2) + + y = ((-1)**signv)*(2**(expn))*fracn*(2**(-10)) + return y diff --git a/gateware/float_arithmetic/test/float_conv.py b/gateware/float_arithmetic/test/float_conv.py new file mode 100644 index 00000000..8c05d679 --- /dev/null +++ b/gateware/float_arithmetic/test/float_conv.py @@ -0,0 +1,24 @@ +import numpy as np + +def float2binint(f): + x = int(bin(np.float16(f).view('H'))[2:].zfill(16),2) + return x + + +def binint2float(x): + xs = bin(x)[2:].zfill(16) + frac = '1'+xs[6:16] + fracn = int(frac,2) + exp = xs[1:6] + expn = int(exp,2) -15 + + if expn == -15 : + expn = -14 + frac = '0'+xs[6:16] + fracn = int(frac,2) + + sign = xs[0] + signv = int(sign,2) + + y = ((-1)**signv)*(2**(expn))*fracn*(2**(-10)) + return y diff --git a/gateware/float_arithmetic/test/floatadd_tb.py b/gateware/float_arithmetic/test/floatadd_tb.py new file mode 100644 index 00000000..291c6154 --- /dev/null +++ b/gateware/float_arithmetic/test/floatadd_tb.py @@ -0,0 +1,42 @@ +from migen.fhdl.std import * +from migen.sim.generic import run_simulation +from migen.flow.actor import EndpointDescription + +from gateware.float_arithmetic.common import * +from gateware.float_arithmetic.floatadd import FloatAdd + +from gateware.float_arithmetic.test.common import * + + +class TB(Module): + def __init__(self): + self.submodules.streamer = PacketStreamer(EndpointDescription([("data", 32)], packetized=True)) + self.submodules.floatadd = FloatAdd() + self.submodules.logger = PacketLogger(EndpointDescription([("data", 16)], packetized=True)) + + self.comb += [ + Record.connect(self.streamer.source, self.floatadd.sink, leave_out=["data"]), + self.floatadd.sink.payload.in1.eq(self.streamer.source.data[16:32]), + self.floatadd.sink.payload.in2.eq(self.streamer.source.data[0:16]), + + Record.connect(self.floatadd.source, self.logger.sink, leave_out=["out"]), + self.logger.sink.data[0:16].eq(self.floatadd.source.out) + ] + + def gen_simulation(self, selfp): + + for i in range(16): + yield + + # convert image using rgb2ycbcr implementation + raw_image = RAWImage(None, None, 64) + raw_image.pack_mult_in() + packet = Packet(raw_image.data) + self.streamer.send(packet) + yield from self.logger.receive() + raw_image.set_data(self.logger.packet) + raw_image.unpack_mult_in() +# raw_image.save("lena_rgb2ycbcr.png") + +if __name__ == "__main__": + run_simulation(TB(), ncycles=8192, vcd_name="my.vcd", keep_files=True) diff --git a/gateware/float_arithmetic/test/floatmult_tb.py b/gateware/float_arithmetic/test/floatmult_tb.py new file mode 100644 index 00000000..fe2d7485 --- /dev/null +++ b/gateware/float_arithmetic/test/floatmult_tb.py @@ -0,0 +1,43 @@ +from migen.fhdl.std import * +from migen.sim.generic import run_simulation +from migen.flow.actor import EndpointDescription + +from gateware.float_arithmetic.common import * +from gateware.float_arithmetic.floatmult import FloatMult + +from gateware.float_arithmetic.test.common import * + + +class TB(Module): + def __init__(self): + self.submodules.streamer = PacketStreamer(EndpointDescription([("data", 32)], packetized=True)) + self.submodules.floatmult = FloatMult() + self.submodules.logger = PacketLogger(EndpointDescription([("data", 16)], packetized=True)) + + self.comb += [ + Record.connect(self.streamer.source, self.floatmult.sink, leave_out=["data"]), + self.floatmult.sink.payload.in1.eq(self.streamer.source.data[16:32]), + self.floatmult.sink.payload.in2.eq(self.streamer.source.data[0:16]), + + Record.connect(self.floatmult.source, self.logger.sink, leave_out=["out"]), + self.logger.sink.data[0:16].eq(self.floatmult.source.out) + ] + + + def gen_simulation(self, selfp): + + for i in range(16): + yield + + # convert image using rgb2ycbcr implementation + raw_image = RAWImage(None, None, 64) + raw_image.pack_mult_in() + packet = Packet(raw_image.data) + self.streamer.send(packet) + yield from self.logger.receive() + raw_image.set_data(self.logger.packet) + raw_image.unpack_mult_in() +# raw_image.save("lena_rgb2ycbcr.png") + +if __name__ == "__main__": + run_simulation(TB(), ncycles=8192, vcd_name="my.vcd", keep_files=True) diff --git a/gateware/float_arithmetic/test/lena.png b/gateware/float_arithmetic/test/lena.png new file mode 100644 index 00000000..37b0d61a Binary files /dev/null and b/gateware/float_arithmetic/test/lena.png differ diff --git a/gateware/hdmi_in/analysis.py b/gateware/hdmi_in/analysis.py index 27236223..966ae3d3 100644 --- a/gateware/hdmi_in/analysis.py +++ b/gateware/hdmi_in/analysis.py @@ -9,6 +9,10 @@ from gateware.csc.rgb2ycbcr import RGB2YCbCr from gateware.csc.ycbcr444to422 import YCbCr444to422 +from gateware.csc.rgb2rgb16f import RGB2RGB16f +from gateware.csc.rgb16f2rgb import RGB16f2RGB +from gateware.float_arithmetic.floatmult import FloatMultRGB +from gateware.float_arithmetic.floatadd import FloatAddRGB class SyncPolarity(Module): def __init__(self): @@ -127,6 +131,7 @@ def __init__(self, word_width, fifo_depth): self._overflow = CSR() + ### ### de_r = Signal() diff --git a/gateware/hdmi_out/__init__.py b/gateware/hdmi_out/__init__.py index 6dcb7bd2..a26691cf 100644 --- a/gateware/hdmi_out/__init__.py +++ b/gateware/hdmi_out/__init__.py @@ -5,33 +5,51 @@ from migen.actorlib import structuring, misc from misoclib.mem.sdram.frontend import dma_lasmi -from gateware.hdmi_out.format import bpp, pixel_layout, FrameInitiator, VTG +from gateware.hdmi_out.format import bpp, pixel_layout, FrameInitiator, VTG, ConCat from gateware.hdmi_out.phy import Driver from gateware.i2c import I2C class HDMIOut(Module, AutoCSR): - def __init__(self, pads, lasmim, external_clocking=None): + def __init__(self, pads0, lasmim, lasmim1, external_clocking=None): pack_factor = lasmim.dw//bpp - if hasattr(pads, "scl"): - self.submodules.i2c = I2C(pads) + if hasattr(pads0, "scl"): + self.submodules.hdmi_out0_i2c = I2C(pads0) +# if hasattr(pads1, "scl"): +# self.submodules.hdmi_out1_i2c = I2C(pads1) g = DataFlowGraph() - self.fi = FrameInitiator(lasmim.aw, pack_factor) +# self.fi = FrameInitiator(lasmim.aw, pack_factor) + self.fi = FrameInitiator(lasmim.aw, pack_factor, ndmas=2) - intseq = misc.IntSequence(lasmim.aw, lasmim.aw) - dma_out = AbstractActor(plumbing.Buffer) - g.add_connection(self.fi, intseq, source_subr=self.fi.dma_subr()) - g.add_pipeline(intseq, AbstractActor(plumbing.Buffer), dma_lasmi.Reader(lasmim), dma_out) + intseq0 = misc.IntSequence(lasmim.aw, lasmim.aw) + intseq1 = misc.IntSequence(lasmim.aw, lasmim.aw) + + dma_out0 = AbstractActor(plumbing.Buffer) + dma_out1 = AbstractActor(plumbing.Buffer) + + g.add_connection(self.fi, intseq0, source_subr=self.fi.dma_subr(0)) + g.add_connection(self.fi, intseq1, source_subr=self.fi.dma_subr(1)) + + g.add_pipeline(intseq0, AbstractActor(plumbing.Buffer), dma_lasmi.Reader(lasmim ), dma_out0) + g.add_pipeline(intseq1, AbstractActor(plumbing.Buffer), dma_lasmi.Reader(lasmim1), dma_out1) + + cast0 = structuring.Cast(lasmim.dw, pixel_layout(pack_factor), reverse_to=True) + cast1 = structuring.Cast(lasmim.dw, pixel_layout(pack_factor), reverse_to=True) + concat = ConCat(pack_factor) - cast = structuring.Cast(lasmim.dw, pixel_layout(pack_factor), reverse_to=True) vtg = VTG(pack_factor) - self.driver = Driver(pack_factor, pads, external_clocking) + self.driver = Driver(pack_factor, pads0, external_clocking) g.add_connection(self.fi, vtg, source_subr=self.fi.timing_subr, sink_ep="timing") - g.add_connection(dma_out, cast) - g.add_connection(cast, vtg, sink_ep="pixels") + g.add_connection(dma_out0, cast0) + g.add_connection(dma_out1, cast1) + g.add_connection(cast0, concat, sink_ep="pix0") + g.add_connection(cast1, concat, sink_ep="pix1") + + g.add_connection(concat, vtg, sink_ep="pixels") + g.add_connection(vtg, self.driver) self.submodules += CompositeActor(g) diff --git a/gateware/hdmi_out/format.py b/gateware/hdmi_out/format.py index bf69ff6b..90a3facb 100644 --- a/gateware/hdmi_out/format.py +++ b/gateware/hdmi_out/format.py @@ -15,10 +15,16 @@ ("y", bpc) ] - def pixel_layout(pack_factor): return [("p"+str(i), pixel_layout_s) for i in range(pack_factor)] +def pixel_layout_c(pack_factor): + r = [] + for i in range(pack_factor): + r.append(("p"+str(i), pixel_layout_s)) + r.append(("q"+str(i), pixel_layout_s)) + return r + bpc_phy = 8 phy_layout_s = [ ("cb_cr", bpc_phy), @@ -30,6 +36,7 @@ def phy_layout(pack_factor): r = [("hsync", 1), ("vsync", 1), ("de", 1)] for i in range(pack_factor): r.append(("p"+str(i), phy_layout_s)) + r.append(("q"+str(i), phy_layout_s)) return r @@ -61,6 +68,24 @@ def __init__(self, bus_aw, pack_factor, ndmas=1): def dma_subr(self, i=0): return ["length", "base"+str(i)] +class ConCat(Module): + def __init__(self, pack_factor): + self.pix0 = Sink(pixel_layout(pack_factor)) + self.pix1 = Sink(pixel_layout(pack_factor)) + self.pix = Source(pixel_layout_c(pack_factor)) + self.busy = Signal() + + self.comb += [ + [getattr(getattr(self.pix.payload, p_phy), c).eq(getattr(getattr(self.pix0.payload, p_pixel), c)) + for p_phy,p_pixel in zip(["p"+str(i) for i in range(pack_factor)], ["p"+str(i) for i in range(pack_factor)]) for c in ["y", "cb_cr"]], + [getattr(getattr(self.pix.payload, p_phy), c).eq(getattr(getattr(self.pix1.payload, p_pixel), c)) + for p_phy,p_pixel in zip(["q"+str(i) for i in range(pack_factor)], ["p"+str(i) for i in range(pack_factor)]) for c in ["y", "cb_cr"]], + self.pix0.ack.eq(self.pix.ack & self.pix.stb), + self.pix1.ack.eq(self.pix.ack & self.pix.stb), + self.pix.stb.eq(self.pix0.stb & self.pix1.stb), + self.busy.eq(0) + ] + class VTG(Module): def __init__(self, pack_factor): @@ -75,7 +100,7 @@ def __init__(self, pack_factor): ("vsync_end", _vbits), ("vscan", _vbits)] self.timing = Sink(timing_layout) - self.pixels = Sink(pixel_layout(pack_factor)) + self.pixels = Sink(pixel_layout_c(pack_factor)) self.phy = Source(phy_layout(pack_factor)) self.busy = Signal() @@ -92,11 +117,13 @@ def __init__(self, pack_factor): self.comb += [ active.eq(hactive & vactive), If(active, - [getattr(getattr(self.phy.payload, p), c).eq(getattr(getattr(self.pixels.payload, p), c)[skip:]) - for p in ["p"+str(i) for i in range(pack_factor)] for c in ["y", "cb_cr"]], + [getattr(getattr(self.phy.payload, p_phy), c).eq(getattr(getattr(self.pixels.payload, p_pixel), c)[skip:]) + for p_phy,p_pixel in zip(["p"+str(i) for i in range(pack_factor)], ["p"+str(i) for i in range(pack_factor)]) for c in ["y", "cb_cr"]], + [getattr(getattr(self.phy.payload, p_phy), c).eq(getattr(getattr(self.pixels.payload, p_pixel), c)[skip:]) + for p_phy,p_pixel in zip(["q"+str(i) for i in range(pack_factor)], ["q"+str(i) for i in range(pack_factor)]) for c in ["y", "cb_cr"]], self.phy.de.eq(1) ), - self.pixels.ack.eq(self.phy.ack & active) + self.pixels.ack.eq(self.phy.ack & active), ] load_timing = Signal() @@ -139,9 +166,9 @@ def __init__(self, pack_factor): ) self.fsm.act("GENERATE", self.busy.eq(1), - If(~active | self.pixels.stb, + If(~active | (self.pixels.stb), self.phy.stb.eq(1), If(self.phy.ack, generate_en.eq(1)) ), - If(generate_frame_done, NextState("GET_TIMING")) + If(generate_frame_done, NextState("GET_TIMING")) ) diff --git a/gateware/hdmi_out/phy.py b/gateware/hdmi_out/phy.py index 12f54f2b..c343351f 100644 --- a/gateware/hdmi_out/phy.py +++ b/gateware/hdmi_out/phy.py @@ -10,6 +10,10 @@ from gateware.csc.ycbcr2rgb import YCbCr2RGB from gateware.csc.ycbcr422to444 import YCbCr422to444 from gateware.csc.ymodulator import YModulator +from gateware.csc.rgb2rgb16f import RGB2RGB16f +from gateware.csc.rgb16f2rgb import RGB16f2RGB +from gateware.float_arithmetic.floatmult import FloatMultRGB +from gateware.float_arithmetic.floatadd import FloatAddRGB class _FIFO(Module): def __init__(self, pack_factor): @@ -19,8 +23,10 @@ def __init__(self, pack_factor): self.pix_hsync = Signal() self.pix_vsync = Signal() self.pix_de = Signal() - self.pix_y = Signal(bpc_phy) - self.pix_cb_cr = Signal(bpc_phy) + self.pix_y_0 = Signal(bpc_phy) + self.pix_cb_cr_0 = Signal(bpc_phy) + self.pix_y_1 = Signal(bpc_phy) + self.pix_cb_cr_1 = Signal(bpc_phy) ### @@ -49,10 +55,13 @@ def __init__(self, pack_factor): self.pix_de.eq(fifo.dout.de) ] for i in range(pack_factor): - pixel = getattr(fifo.dout, "p"+str(i)) + pixel0 = getattr(fifo.dout, "p"+str(i)) + pixel1 = getattr(fifo.dout, "q"+str(i)) self.sync.pix += If(unpack_counter == i, - self.pix_y.eq(pixel.y), - self.pix_cb_cr.eq(pixel.cb_cr) + self.pix_y_0.eq(pixel0.y), + self.pix_cb_cr_0.eq(pixel0.cb_cr), + self.pix_y_1.eq(pixel1.y), + self.pix_cb_cr_1.eq(pixel1.cb_cr) ) self.comb += fifo.re.eq(unpack_counter == (pack_factor - 1)) @@ -179,7 +188,7 @@ def __init__(self, pads, external_clocking): else: self.clock_domains.cd_pix = ClockDomain(reset_less=True) - self.specials += Instance("BUFG", name="hdmi_out_pix_bufg", i_I=external_clocking.pll_clk2, o_O=self.cd_pix.clk) + self.specials += Instance("BUFG", name="hdmi_out_pix_bufg1", i_I=external_clocking.pll_clk2, o_O=self.cd_pix.clk) self.clock_domains.cd_pix2x = ClockDomain(reset_less=True) self.clock_domains.cd_pix10x = ClockDomain(reset_less=True) self.serdesstrobe = Signal() @@ -199,44 +208,137 @@ def __init__(self, pads, external_clocking): i_C1=~ClockSignal("pix"), i_CE=1, i_D0=1, i_D1=0, i_R=0, i_S=0) + self.specials += Instance("OBUFDS", i_I=hdmi_clk_se, o_O=pads.clk_p, o_OB=pads.clk_n) +# self.specials += Instance("OBUFDS", i_I=hdmi_clk_se, +# o_O=pads1.clk_p, o_OB=pads1.clk_n) + class Driver(Module, AutoCSR): - def __init__(self, pack_factor, pads, external_clocking): + def __init__(self, pack_factor, pads0, external_clocking): fifo = _FIFO(pack_factor) self.submodules += fifo self.phy = fifo.phy self.busy = fifo.busy - self.submodules.clocking = _Clocking(pads, external_clocking) + self.submodules.clocking = _Clocking(pads0, external_clocking) +# self.submodules.clocking1 = _Clocking(pads1, self.clocking) de_r = Signal() self.sync.pix += de_r.eq(fifo.pix_de) - chroma_upsampler = YCbCr422to444() - self.submodules += RenameClockDomains(chroma_upsampler, "pix") + chroma_upsampler0 = YCbCr422to444() + chroma_upsampler1 = YCbCr422to444() + self.submodules += RenameClockDomains(chroma_upsampler0, "pix") + self.submodules += RenameClockDomains(chroma_upsampler1, "pix") + self.comb += [ - chroma_upsampler.sink.stb.eq(fifo.pix_de), - chroma_upsampler.sink.sop.eq(fifo.pix_de & ~de_r), - chroma_upsampler.sink.y.eq(fifo.pix_y), - chroma_upsampler.sink.cb_cr.eq(fifo.pix_cb_cr) + chroma_upsampler0.sink.stb.eq(fifo.pix_de), + chroma_upsampler0.sink.sop.eq(fifo.pix_de & ~de_r), + chroma_upsampler0.sink.y.eq(fifo.pix_y_0), + chroma_upsampler0.sink.cb_cr.eq(fifo.pix_cb_cr_0), + + chroma_upsampler1.sink.stb.eq(fifo.pix_de), + chroma_upsampler1.sink.sop.eq(fifo.pix_de & ~de_r), + chroma_upsampler1.sink.y.eq(fifo.pix_y_1), + chroma_upsampler1.sink.cb_cr.eq(fifo.pix_cb_cr_1) ] - ycbcr2rgb = YCbCr2RGB() - self.submodules += RenameClockDomains(ycbcr2rgb, "pix") + self.mult_factor0 = CSRStorage(16, reset=14336) # 0.25 + self.mult_factor1 = CSRStorage(16, reset=14336) # 0.25 + + ycbcr2rgb0 = YCbCr2RGB() + ycbcr2rgb1 = YCbCr2RGB() + self.submodules += RenameClockDomains(ycbcr2rgb0, "pix") + self.submodules += RenameClockDomains(ycbcr2rgb1, "pix") + + rgb2rgb16f0 = RGB2RGB16f() + rgb2rgb16f1 = RGB2RGB16f() + self.submodules += RenameClockDomains(rgb2rgb16f0, "pix") + self.submodules += RenameClockDomains(rgb2rgb16f1, "pix") + + rgb16f2rgb0 = RGB16f2RGB() + rgb16f2rgb1 = RGB16f2RGB() + self.submodules += RenameClockDomains(rgb16f2rgb0, "pix") + self.submodules += RenameClockDomains(rgb16f2rgb1, "pix") + + floatmult0 = FloatMultRGB() + floatmult1 = FloatMultRGB() + self.submodules += RenameClockDomains(floatmult0, "pix") + self.submodules += RenameClockDomains(floatmult1, "pix") + + floatadd0 = FloatAddRGB() + floatadd1 = FloatAddRGB() + self.submodules += RenameClockDomains(floatadd0, "pix") + self.submodules += RenameClockDomains(floatadd1, "pix") + self.comb += [ - Record.connect(chroma_upsampler.source, ycbcr2rgb.sink), - ycbcr2rgb.source.ack.eq(1) + + # Input0 + Record.connect(chroma_upsampler0.source, ycbcr2rgb0.sink), + Record.connect(ycbcr2rgb0.source, rgb2rgb16f0.sink), + + floatmult0.sink.r1.eq(rgb2rgb16f0.source.rf), + floatmult0.sink.g1.eq(rgb2rgb16f0.source.gf), + floatmult0.sink.b1.eq(rgb2rgb16f0.source.bf), + floatmult0.sink.r2.eq(self.mult_factor0.storage), + floatmult0.sink.g2.eq(self.mult_factor0.storage), + floatmult0.sink.b2.eq(self.mult_factor0.storage), + floatmult0.sink.stb.eq(rgb2rgb16f0.source.stb), + rgb2rgb16f0.source.ack.eq(floatmult0.sink.ack), + floatmult0.sink.sop.eq(rgb2rgb16f0.source.sop), + floatmult0.sink.eop.eq(rgb2rgb16f0.source.eop), + + # Input1 + Record.connect(chroma_upsampler1.source, ycbcr2rgb1.sink), + Record.connect(ycbcr2rgb1.source, rgb2rgb16f1.sink), + + floatmult1.sink.r1.eq(rgb2rgb16f1.source.rf), + floatmult1.sink.g1.eq(rgb2rgb16f1.source.gf), + floatmult1.sink.b1.eq(rgb2rgb16f1.source.bf), + floatmult1.sink.r2.eq(self.mult_factor1.storage), + floatmult1.sink.g2.eq(self.mult_factor1.storage), + floatmult1.sink.b2.eq(self.mult_factor1.storage), + + floatmult1.sink.stb.eq(rgb2rgb16f1.source.stb), + rgb2rgb16f1.source.ack.eq(floatmult1.sink.ack), + floatmult1.sink.sop.eq(rgb2rgb16f1.source.sop), + floatmult1.sink.eop.eq(rgb2rgb16f1.source.eop), + + # Mult output of both inputs now connected + floatadd0.sink.r1.eq(floatmult0.source.rf), + floatadd0.sink.g1.eq(floatmult0.source.gf), + floatadd0.sink.b1.eq(floatmult0.source.bf), + floatadd0.sink.r2.eq(floatmult1.source.rf), + floatadd0.sink.g2.eq(floatmult1.source.gf), + floatadd0.sink.b2.eq(floatmult1.source.bf), + + floatadd0.sink.stb.eq(floatmult0.source.stb & floatmult1.source.stb ), + floatadd0.sink.sop.eq(floatmult0.source.sop & floatmult1.source.sop ), + floatadd0.sink.eop.eq(floatmult0.source.eop & floatmult1.source.eop ), + floatmult0.source.ack.eq(floatadd0.sink.ack & floatadd0.sink.stb), + floatmult1.source.ack.eq(floatadd0.sink.ack & floatadd0.sink.stb), + + # Other input for floatadd setup in opsis_video.py + + Record.connect(floatadd0.source, rgb16f2rgb0.sink), + rgb16f2rgb0.source.ack.eq(1), ] # XXX need clean up de = fifo.pix_de hsync = fifo.pix_hsync vsync = fifo.pix_vsync - for i in range(chroma_upsampler.latency + - ycbcr2rgb.latency): + for i in range(chroma_upsampler0.latency + + ycbcr2rgb0.latency + + rgb2rgb16f0.latency + + floatmult0.latency + + floatadd0.latency + + rgb16f2rgb0.latency + ): + next_de = Signal() next_vsync = Signal() next_hsync = Signal() @@ -249,12 +351,23 @@ def __init__(self, pack_factor, pads, external_clocking): vsync = next_vsync hsync = next_hsync - self.submodules.hdmi_phy = hdmi.PHY(self.clocking.serdesstrobe, pads) + self.submodules.hdmi_phy0 = hdmi.PHY(self.clocking.serdesstrobe, pads0) +# self.submodules.hdmi_phy1 = hdmi.PHY(self.clocking.serdesstrobe, pads1) + self.comb += [ - self.hdmi_phy.hsync.eq(hsync), - self.hdmi_phy.vsync.eq(vsync), - self.hdmi_phy.de.eq(de), - self.hdmi_phy.r.eq(ycbcr2rgb.source.r), - self.hdmi_phy.g.eq(ycbcr2rgb.source.g), - self.hdmi_phy.b.eq(ycbcr2rgb.source.b) + self.hdmi_phy0.hsync.eq(hsync), + self.hdmi_phy0.vsync.eq(vsync), + self.hdmi_phy0.de.eq(de), + +# self.hdmi_phy1.hsync.eq(hsync), +# self.hdmi_phy1.vsync.eq(vsync), +# self.hdmi_phy1.de.eq(de), + + self.hdmi_phy0.r.eq(rgb16f2rgb0.source.r), + self.hdmi_phy0.g.eq(rgb16f2rgb0.source.g), + self.hdmi_phy0.b.eq(rgb16f2rgb0.source.b), + +# self.hdmi_phy1.r.eq(rgb16f2rgb1.source.r), +# self.hdmi_phy1.g.eq(rgb16f2rgb1.source.g), +# self.hdmi_phy1.b.eq(rgb16f2rgb1.source.b), ] diff --git a/targets/opsis_base.py b/targets/opsis_base.py index aea59311..03d670fe 100644 --- a/targets/opsis_base.py +++ b/targets/opsis_base.py @@ -27,6 +27,7 @@ from gateware import i2c from gateware import i2c_hack from gateware import platform_info +from gateware.float_arithmetic import floatmult from targets.common import * @@ -139,6 +140,7 @@ class BaseSoC(SDRAMSoC): "spiflash", "ddrphy", "dna", + "floatmult", "git_info", "platform_info", "fx2_reset", diff --git a/targets/opsis_video.py b/targets/opsis_video.py index 0ac62222..f42a09d8 100644 --- a/targets/opsis_video.py +++ b/targets/opsis_video.py @@ -17,7 +17,7 @@ class CustomVideoMixerSoC(base): "hdmi_in1_edid_mem", ) csr_map_update(base.csr_map, csr_peripherals) - + interrupt_map = { "hdmi_in0": 3, "hdmi_in1": 4, @@ -36,31 +36,58 @@ def __init__(self, platform, **kwargs): fifo_depth=512) self.submodules.hdmi_out0 = HDMIOut( platform.request("hdmi_out", 0), + self.sdram.crossbar.get_master(), self.sdram.crossbar.get_master()) +# self.sdram.crossbar.get_master()) # Share clocking with hdmi_out0 since no PLL_ADV left. - self.submodules.hdmi_out1 = HDMIOut( - platform.request("hdmi_out", 1), - self.sdram.crossbar.get_master(), - self.hdmi_out0.driver.clocking) +# self.submodules.hdmi_out1 = HDMIOut( +# platform.request("hdmi_out", 1), +# self.sdram.crossbar.get_master(), +# self.hdmi_out0.driver.clocking) + # Video Mixer Equation Implemented + ''' + self.comb += [ + + self.hdmi_out0.driver.floatadd.sink.b1.eq(self.hdmi_out0.driver.floatmult.source.bf), + self.hdmi_out0.driver.floatadd.sink.g1.eq(self.hdmi_out0.driver.floatmult.source.gf), + self.hdmi_out0.driver.floatadd.sink.r1.eq(self.hdmi_out0.driver.floatmult.source.rf), + self.hdmi_out0.driver.floatadd.sink.b2.eq(self.hdmi_out1.driver.floatmult.source.bf), + self.hdmi_out0.driver.floatadd.sink.g2.eq(self.hdmi_out1.driver.floatmult.source.gf), + self.hdmi_out0.driver.floatadd.sink.r2.eq(self.hdmi_out1.driver.floatmult.source.rf), + + self.hdmi_out1.driver.floatadd.sink.b1.eq(self.hdmi_out0.driver.floatmult.source.bf), + self.hdmi_out1.driver.floatadd.sink.g1.eq(self.hdmi_out0.driver.floatmult.source.gf), + self.hdmi_out1.driver.floatadd.sink.r1.eq(self.hdmi_out0.driver.floatmult.source.rf), + self.hdmi_out1.driver.floatadd.sink.b2.eq(self.hdmi_out1.driver.floatmult.source.bf), + self.hdmi_out1.driver.floatadd.sink.g2.eq(self.hdmi_out1.driver.floatmult.source.gf), + self.hdmi_out1.driver.floatadd.sink.r2.eq(self.hdmi_out1.driver.floatmult.source.rf), + + self.hdmi_out0.driver.floatadd.sink.stb.eq(self.hdmi_out0.driver.floatmult.source.stb & self.hdmi_out1.driver.floatmult.source.stb ), + self.hdmi_out0.driver.floatadd.sink.sop.eq(self.hdmi_out0.driver.floatmult.source.sop & self.hdmi_out1.driver.floatmult.source.sop ), + self.hdmi_out0.driver.floatadd.sink.eop.eq(self.hdmi_out0.driver.floatmult.source.eop & self.hdmi_out1.driver.floatmult.source.eop ), + + self.hdmi_out1.driver.floatadd.sink.stb.eq(self.hdmi_out0.driver.floatmult.source.stb & self.hdmi_out1.driver.floatmult.source.stb ), + self.hdmi_out1.driver.floatadd.sink.sop.eq(self.hdmi_out0.driver.floatmult.source.sop & self.hdmi_out1.driver.floatmult.source.sop ), + self.hdmi_out1.driver.floatadd.sink.eop.eq(self.hdmi_out0.driver.floatmult.source.eop & self.hdmi_out1.driver.floatmult.source.eop ), + + self.hdmi_out0.driver.floatmult.source.ack.eq(self.hdmi_out0.driver.floatadd.sink.ack & self.hdmi_out0.driver.floatadd.sink.stb), + self.hdmi_out1.driver.floatmult.source.ack.eq(self.hdmi_out0.driver.floatadd.sink.ack & self.hdmi_out0.driver.floatadd.sink.stb), + ] + ''' + # all PLL_ADV are used: router needs help... platform.add_platform_command("""INST PLL_ADV LOC=PLL_ADV_X0Y0;""") # FIXME: Fix the HDMI out so this can be removed. platform.add_platform_command( """PIN "hdmi_out_pix_bufg.O" CLOCK_DEDICATED_ROUTE = FALSE;""") - platform.add_platform_command( - """PIN "hdmi_out_pix_bufg_1.O" CLOCK_DEDICATED_ROUTE = FALSE;""") platform.add_platform_command( """ NET "{pix0_clk}" TNM_NET = "GRPpix0_clk"; -NET "{pix1_clk}" TNM_NET = "GRPpix1_clk"; TIMESPEC "TSise_sucks7" = FROM "GRPpix0_clk" TO "GRPsys_clk" TIG; TIMESPEC "TSise_sucks8" = FROM "GRPsys_clk" TO "GRPpix0_clk" TIG; -TIMESPEC "TSise_sucks9" = FROM "GRPpix1_clk" TO "GRPsys_clk" TIG; -TIMESPEC "TSise_sucks10" = FROM "GRPsys_clk" TO "GRPpix1_clk" TIG; """, pix0_clk=self.hdmi_out0.driver.clocking.cd_pix.clk, - pix1_clk=self.hdmi_out1.driver.clocking.cd_pix.clk, ) for k, v in sorted(platform.hdmi_infos.items()):