ref: 440860780f767ae6af42ae2c9f49bcca46b6ceeb
dir: /hj264.c/
#define MINIH264_IMPLEMENTATION #define H264E_MAX_THREADS 7 #ifdef __amd64__ #define memcpy memcpyf #endif #include "minih264e.h" #include <thread.h> #include <bio.h> #include <draw.h> #include <memdraw.h> #include <tos.h> void npe_nsleep(uvlong ns); #define max(a,b) ((a)>(b)?(a):(b)) #define min(a,b) ((a)<(b)?(a):(b)) #define clp(v,a,b) min((b), max((v),(a))) #define align(p,a) (void*)((((uintptr)p - 1) | (a-1)) + 1) enum { Align = 64, Maxquality = 10, Gop = 20, }; typedef struct Hjob Hjob; typedef struct Hjthread Hjthread; typedef struct Hj264 Hj264; typedef struct Img Img; struct Hjob { void (*run)(void *); void *arg; }; struct Hjthread { int id; Channel *job; Channel *done; }; struct Hj264 { H264E_persist_t *persist; H264E_scratch_t *scratch; H264E_run_param_t rp; H264E_io_yuv_t yuv; Biobuf out; Channel *frame; Hjthread threads[H264E_MAX_THREADS]; Hjob jobs[H264E_MAX_THREADS]; int nthreads; u8int buf[1]; }; struct Img { int w; int h; u8int bgrx[]; }; static void xrgb2yuv(u8int *bgrx, int w, int h, H264E_io_yuv_t *io) { u8int *py, *pu, *pv; int x, y, r, g, b; py = io->yuv[0]; pu = io->yuv[1]; pv = io->yuv[2]; for(y = 0; y < h;){ for(x = 0; x < w;){ b = bgrx[0]; g = bgrx[1]; r = bgrx[2]; bgrx += 4; #define YY ((( 77*r + 150*g + 29*b + 128) >> 8) + 0) #define UU (((-43*r - 84*g + 127*b + 128) >> 8) + 128) #define VV (((127*r - 106*g - 21*b + 128) >> 8) + 128) py[x] = YY; pu[x/2] = UU; pv[x/2] = VV; x++; b = bgrx[0]; g = bgrx[1]; r = bgrx[2]; bgrx += 4; py[x] = YY; x++; } py += io->stride[0]; y++; for(x = 0; x < w;){ b = bgrx[0]; g = bgrx[1]; r = bgrx[2]; bgrx += 4; py[x] = YY; x++; #undef YY #undef UU #undef VV } py += io->stride[0]; pu += io->stride[1]; pv += io->stride[2]; y++; } } #pragma varargck type "ℏ" int static int hjerror(Fmt *f) { char *s; int e; s = nil; e = va_arg(f->args, int); switch(e){ case H264E_STATUS_SUCCESS: s = "success"; break; case H264E_STATUS_BAD_ARGUMENT: s = "bad argument"; break; case H264E_STATUS_BAD_PARAMETER: s = "bad parameter"; break; case H264E_STATUS_BAD_FRAME_TYPE: s = "bad frame type"; break; case H264E_STATUS_SIZE_NOT_MULTIPLE_16: s = "size not multiple of 16"; break; case H264E_STATUS_SIZE_NOT_MULTIPLE_2: s = "size not multiple of 2"; break; case H264E_STATUS_BAD_LUMA_ALIGN: s = "bad luma alignment"; break; case H264E_STATUS_BAD_LUMA_STRIDE: s = "bad luma stride"; break; case H264E_STATUS_BAD_CHROMA_ALIGN: s = "bad chroma alignment"; break; case H264E_STATUS_BAD_CHROMA_STRIDE: s = "bad chroma stride"; break; } return s == nil ? fmtprint(f, "error %d", e) : fmtprint(f, "%s", s); } static void threadf(void *p) { Hjthread *t; Hjob *j; Channel *job, *done; t = p; threadsetname("hj264/%d", t->id); job = t->job; done = t->done; for(sendp(done, nil); (j = recvp(job)) != nil; sendp(done, j)) j->run(j->arg); chanfree(done); chanfree(job); threadexits(nil); } static void hjobsrun(void *p, void (*run)(void *), void **arg, int njob) { int n, t; Hj264 *h; Hjob *j; h = p; for(n = 0; n < njob;){ for(t = 0; t < h->nthreads && n < njob; t++, n++){ j = &h->jobs[t]; j->run = run; j->arg = arg[n]; sendp(h->threads[t].job, j); } for(t--; t >= 0; t--) recvp(h->threads[t].done); } } static int hj264_encode(Hj264 *h, u8int **data, int *sz) { int e; if((e = H264E_encode(h->persist, h->scratch, &h->rp, &h->yuv, data, sz)) != 0){ werrstr("H264E_encode: %ℏ", e); return -1; } return 0; } static Hj264 * hj264new(int nthreads, int denoise, int kbps, int ww, int hh) { int i, e, szscratch, szpersist, szyuv; H264E_create_param_t cp; Hjthread *t; u8int *p; Hj264 *h; nthreads = clp(nthreads, 1, H264E_MAX_THREADS); /* YUV logic requires alignment, allow height to be different (pad it) */ hh = ((hh-1) | 15) + 1; memset(&cp, 0, sizeof(cp)); cp.num_layers = 1; cp.gop = Gop; cp.max_threads = nthreads; cp.temporal_denoise_flag = denoise; cp.max_long_term_reference_frames = MAX_LONG_TERM_FRAMES; cp.vbv_size_bytes = kbps/1000*8/2; /* 2 seconds */ cp.width = ww; cp.height = hh; if((e = H264E_sizeof(&cp, &szpersist, &szscratch)) != 0){ werrstr("H264E_sizeof: %ℏ", e); return nil; } /* FIXME not padding width yet, so it still has to be multiple of 16 */ /* once we do that, put this line to where "hh" is aligned */ ww = ((ww-1) | 15) + 1; szyuv = ww*hh*3/2; if((h = calloc(1, sizeof(*h) + Align+szyuv + Align+szpersist + Align+szscratch)) == nil) return nil; p = align(h->buf, Align); h->yuv.yuv[0] = p; h->yuv.stride[0] = ww; h->yuv.yuv[1] = p + ww*hh; h->yuv.stride[1] = ww/2; h->yuv.yuv[2] = p + ww*hh*5/4; h->yuv.stride[2] = ww/2; h->persist = align(p+szyuv, Align); h->scratch = align(h->persist+szpersist, Align); cp.token = h; cp.run_func_in_thread = hjobsrun; if((e = H264E_init(h->persist, &cp)) != 0){ werrstr("H264E_init: %ℏ", e); return nil; } h->nthreads = nthreads; for(i = 0; i < nthreads; i++){ t = &h->threads[i]; t->id = i; t->job = chancreate(sizeof(void*), 0); t->done = chancreate(sizeof(void*), 0); proccreate(threadf, t, mainstacksize); recvp(t->done); } return h; } static void hj264free(Hj264 *h) { int i; for(i = 0; i < h->nthreads; i++){ chanclose(h->threads[i].done); chanclose(h->threads[i].job); } free(h); } static uvlong nanosec(void) { static uvlong fasthz, xstart; uvlong x, div; if(fasthz == ~0ULL) return nsec() - xstart; if(fasthz == 0){ if(_tos->cyclefreq){ cycles(&xstart); fasthz = _tos->cyclefreq; } else { xstart = nsec(); fasthz = ~0ULL; fprint(2, "cyclefreq not available, falling back to nsec()\n"); fprint(2, "you might want to disable aux/timesync\n"); return 0; } } cycles(&x); x -= xstart; /* this is ugly */ for(div = 1000000000ULL; x < 0x1999999999999999ULL && div > 1 ; div /= 10ULL, x *= 10ULL); return x / (fasthz / div); } static void encthread(void *p) { u8int *data; Img *img; Hj264 *h; int sz; h = p; for(;;){ if((img = recvp(h->frame)) == nil) break; xrgb2yuv(img->bgrx, img->w, img->h, &h->yuv); free(img); if(hj264_encode(h, &data, &sz) != 0) sysfatal("hj264_encode: %r"); if(Bwrite(&h->out, data, sz) != sz) break; } Bflush(&h->out); hj264free(h); threadexits(nil); } static Img * imgread(int f, int w, int h) { int r, n, e; Img *i; e = w*h*4; i = malloc(sizeof(*i) + e); i->w = w; i->h = h; for(n = 0; n < e; n += r){ if((r = pread(f, i->bgrx+n, e-n, n+5*12)) <= 0){ free(i); return nil; } } return i; } static void usage(void) { fprint(2, "usage: %s [-d] [-f FPS] [-n THREADS] [-k KBPS] [-q 0…10] [-Q QP]\n", argv0); threadexitsall("usage"); } int main(int argc, char **argv) { int nthreads, fps, kbps, denoise, quality, qp; uvlong start, end, fstart, fend; int ww, hh, in, nframes; Memimage *im; Img *img; Hj264 *h; char *s; /* use NPROC-1 threads by default */ nthreads = ((s = getenv("NPROC")) != nil) ? atoi(s)-1 : 1; denoise = 0; quality = 10; kbps = 0; fps = 30; qp = 33; ARGBEGIN{ case 'd': denoise++; break; case 'f': fps = atoi(EARGF(usage())); break; case 'k': kbps = atoi(EARGF(usage())); break; case 'n': nthreads = atoi(EARGF(usage())); break; case 'q': quality = atoi(EARGF(usage())); break; case 'Q': qp = atoi(EARGF(usage())); break; default: usage(); }ARGEND if(quality > Maxquality) quality = Maxquality; if(kbps < 0) kbps = 0; if(argc < 1) usage(); if((in = open(*argv, OREAD)) < 0) sysfatal("input: %r"); fmtinstall(L'ℏ', hjerror); memimageinit(); if((im = readmemimage(in)) == nil) sysfatal("image: %r"); ww = Dx(im->r); hh = Dy(im->r); freememimage(im); if((h = hj264new(nthreads, denoise, kbps, ww, hh)) == nil) sysfatal("hj264new: %r"); if(Binit(&h->out, 1, OWRITE) < 0) sysfatal("Binit failed: %r"); h->frame = chancreate(sizeof(void*), 1); /* FIXME this is wrong as the encoder might be too late */ /* FIXME how about changing these on the fly? */ h->rp.encode_speed = Maxquality - quality; h->rp.qp_min = h->rp.qp_max = qp; if(kbps > 0){ h->rp.qp_min = 10; h->rp.qp_max = 50; h->rp.desired_frame_bytes = kbps*1000/8/fps; } proccreate(encthread, h, mainstacksize); start = nanosec(); for(nframes = 0;; nframes++){ fstart = nanosec(); if((img = imgread(in, ww, hh)) == nil) break; if(sendp(h->frame, img) != 1) break; fend = nanosec(); if(1000000000ULL/fps > (fend - fstart)) npe_nsleep(1000000000ULL/fps - (fend - fstart)); /* FIXME make a graceful shutdown on a note */ if(nanosec() - start > 10000000000ULL) break; } end = nanosec(); fprint(2, "%d fps\n", (int)(nframes / ((end - start)/1000000000ULL))); chanclose(h->frame); threadexitsall(nil); return 0; }