summaryrefslogtreecommitdiff
path: root/glcapture.c
blob: c9dcd916b2675695be3a6a6ec2438033f60053d1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
/* gcc -std=c99 -fPIC -shared -Wl,-soname,glcapture.so glcapture.c -lasound -o glcapture.so
 * gcc -m32 -std=c99 -fPIC -shared -Wl,-soname,glcapture.so glcapture.c -lasound -o glcapture.so (for 32bit)
 *
 * Capture OpenGL framebuffer, ALSA audio and push them through named pipe
 * Usage: LD_PRELOAD="/path/to/glcapture.so" ./program
 *
 * https://github.com/Cloudef/FFmpeg/tree/rawmux
 * ^ Compile this branch of ffmpeg to get rawmux decoder
 * You can test that it works by doing ./ffplay /tmp/glcapture.fifo
 *
 * Make sure you increase your maximum pipe size /prox/sys/fs/pipe-max-size to minimum of
 * (FPS / 4) * ((width * height * components) + 13) where components is 3 on OpenGL and 4 on OpenGL ES.
 *
 * If you get xruns from alsa, consider increasing your audio buffer size.
 */

/**
 * TODO:
 * - Consider alternative such as using DRM/VAAPI to encode directly to pipe
 * - NVENC also exists for nv blob, however seems to not have public GL interop
 */

#define _GNU_SOURCE
#include <dlfcn.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdbool.h>
#include <stdint.h>
#include <signal.h>
#include <unistd.h>
#include <assert.h>
#include <time.h>
#include <err.h>
#include <pthread.h>
#include <sys/types.h>
#include <sys/stat.h>

#include <GL/glx.h>
#include <EGL/egl.h>
#include <alsa/asoundlib.h>

// Some tunables
// XXX: Make these configurable

// Use any amount you want as long as you have the vram for it
// If you get warning of write_frame taking time, even when not reading the pipe, increase this
#define NUM_PBOS 20

// Target framerate for the video stream
static uint32_t FPS = 60;

// Drop frames if going over target framerate
// Set this to false if you want frame perfect capture
// If your target framerate is lower than game framerate set this to true (i.e. you want to record at lower fps)
static bool DROP_FRAMES = true;

// Multiplier for system clock (MONOTONIC, RAW) can be used to make recordings of replays smoother (or speed hack)
static double SPEED_HACK = 1.0;

// If your video is upside down set this to false
static bool FLIP_VIDEO = true;

// Path for the fifo where glcapture will output the rawmux data
static const char *FIFO_PATH = "/tmp/glcapture.fifo";

enum stream {
   STREAM_VIDEO,
   STREAM_AUDIO,
   STREAM_LAST,
};

// Set to false to disable stream
static const bool ENABLED_STREAMS[STREAM_LAST] = {
   true// STREAM_VIDEO
   true// STREAM_AUDIO
};

#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
#define WARN(x, ...) do { warn("glcapture: "x, ##__VA_ARGS__); } while (0)
#define WARNX(x, ...) do { warnx("glcapture: "x, ##__VA_ARGS__); } while (0)
#define ERRX(x, y, ...) do { errx(x, "glcapture: "y, ##__VA_ARGS__); } while (0)
#define ERR(x, y, ...) do { err(x, "glcapture: "y, ##__VA_ARGS__); } while (0)
#define WARN_ONCE(x, ...) do { static bool o = falseif (!o) { WARNX(x, ##__VA_ARGS__); o = true; } } while (0)

// "entrypoints" exposed to hooks.h
static void swap_buffers(void);
static void alsa_writei(snd_pcm_t *pcm, const void *buffer, const snd_pcm_uframes_t size, const char *caller);
static uint64_t get_fake_time_ns(void);
static __thread GLint LAST_FRAMEBUFFER_BLIT[8];

#include "hooks.h"
#include "glwrangle.h"

struct pbo {
   uint64_t ts;
   uint32_t width, height;
   GLuint obj;
   bool written;
};

struct gl {
   struct pbo pbo[NUM_PBOS];
   uint8_t active; // pbo
};

struct frame_info {
   union {
      struct {
         uint32_t width, height, fps;
      } video;
      struct {
         uint32_t rate;
         uint8_t channels;
      } audio;
   };

   const char *format;
   uint64_t ts;
   enum stream stream;
};

struct fifo {
   struct {
      struct frame_info info;
   } stream[STREAM_LAST];

   FILE *file;
   uint64_t base;
   size_t size;
   int fd;
   bool created;
};

struct buffer {
   void *data;
   size_t size, allocated;
};

#define PROFILE(x, warn_ms, name) do { \
   const uint64_t start = get_time_ns(); \
   x; \
   const double ms = (get_time_ns() - start) / 1e6; \
   if (ms >= warn_ms) WARNX("WARNING: %s took %.2f ms (>=%.0fms)", name, ms, warn_ms); \
while (0)

static void
buffer_resize(struct buffer *buffer, const size_t size)
{
   if (buffer->allocated < size) {
      if (!(buffer->data = realloc(buffer->data, size)))
         ERR(EXIT_FAILURE"realloc(%p%zu)", buffer->data, size);

      buffer->allocated = size;
   }

   buffer->size = size;
}

static uint64_t
get_time_ns(void)
{
   struct timespec ts;
   HOOK(clock_gettime);
   _clock_gettime(CLOCK_MONOTONIC, &ts);
   return (uint64_t)ts.tv_sec * (uint64_t)1e9 + (uint64_t)ts.tv_nsec;
}

static void
reset_fifo(struct fifo *fifo)
{
   close(fifo->fd);
   memset(fifo, 0sizeof(*fifo));
   fifo->fd = -1;
   WARNX("reseting fifo");
}

static bool
write_rawmux_header(struct fifo *fifo)
{
   uint8_t header[255] = { 'r''a''w''m''u''x' };

   size_t variable_sz = 0;
   for (enum stream i = 0; i < STREAM_LAST; ++i)
      variable_sz += (fifo->stream[i].info.format ? strlen(fifo->stream[i].info.format) : 0);

   if (variable_sz + 33 > sizeof(header)) {
      warnx("something went wrong");
      reset_fifo(fifo);
      return false;
   }

   uint8_t *p = header + 6;
   memcpy(p, (uint8_t[]){1}sizeof(uint8_t)); p += 1;

   if (fifo->stream[STREAM_VIDEO].info.format) {
      const struct frame_info *info = &fifo->stream[STREAM_VIDEO].info;
      memcpy(p, (uint8_t[]){1}sizeof(uint8_t)); p += 1;
      memcpy(p, info->format, strlen(info->format)); p += strlen(info->format) + 1;
      memcpy(p, (uint32_t[]){1}sizeof(uint32_t)); p += 4;
      memcpy(p, (uint32_t[]){info->video.fps * 1000}sizeof(uint32_t)); p += 4;
      memcpy(p, &info->video.width, sizeof(uint32_t)); p += 4;
      memcpy(p, &info->video.height, sizeof(uint32_t)); p += 4;
   }

   if (fifo->stream[STREAM_AUDIO].info.format) {
      const struct frame_info *info = &fifo->stream[STREAM_AUDIO].info;
      memcpy(p, (uint8_t[]){2}sizeof(uint8_t)); p += 1;
      memcpy(p, info->format, strlen(info->format)); p += strlen(info->format) + 1;
      memcpy(p, &info->audio.rate, sizeof(info->audio.rate)); p += 4;
      memcpy(p, &info->audio.channels, sizeof(info->audio.channels)); p += 1;
   }

   return (fwrite(header, 1, (p + 1) - header, fifo->file) == (size_t)((p + 1) - header));
}

static bool
stream_info_changed(const struct frame_info *current, const struct frame_info *last)
{
   assert(current->stream == last->stream);

   if (current->stream == STREAM_VIDEO) {
      return (current->format != last->format ||
              current->video.width != last->video.width ||
              current->video.height != last->video.height);
   }

   return (current->format != last->format ||
           current->audio.rate != last->audio.rate ||
           current->audio.channels != last->audio.channels);
}

static bool
check_and_prepare_stream(struct fifo *fifo, const struct frame_info *info)
{
   if (!ENABLED_STREAMS[info->stream])
      return false;

   if (fifo->stream[info->stream].info.format && stream_info_changed(info, &fifo->stream[info->stream].info)) {
      WARNX("stream information has changed");
      reset_fifo(fifo);
   }

   fifo->stream[info->stream].info = *info;

   if (!fifo->created) {
      remove(FIFO_PATH);

      if (!(fifo->created = !mkfifo(FIFO_PATH, 0666)))
         return false;

      fifo->created = true;
   }

   if (fifo->fd < 0) {
      signal(SIGPIPESIG_IGN);

      if ((fifo->fd = open(FIFO_PATH, O_WRONLY | O_NONBLOCK | O_CLOEXEC)) < 0)
         return false;

      // We will use fwrite instead of write for buffered writes.
      // Which will be more stable, since audio/video data isn't actually that large per frame.
      // We also avoid calling to kernel each call.
      fifo->file = fdopen(fifo->fd, "wb");
      assert(fifo->file);

      const int flags = fcntl(fifo->fd, F_GETFL);
      fcntl(fifo->fd, F_SETFL, flags & ~O_NONBLOCK);
      WARNX("stream ready, writing headers");

      if (!write_rawmux_header(fifo))
         return false;

      fifo->base = get_time_ns();
   }

   return true;
}

static void
write_data_unsafe(struct fifo *fifo, const struct frame_info *info, const void *buffer, const size_t size)
{
   if (!check_and_prepare_stream(fifo, info))
      return;

   const uint64_t ts = (fifo->base > info->ts ? fifo->base : info->ts);
   const uint64_t den[STREAM_LAST] = { 1e61e9 };
   const uint64_t rate = (info->stream == STREAM_VIDEO ? info->video.fps : info->audio.rate);
   const uint64_t pts = (ts - fifo->base) / (den[info->stream] / rate);

#if 0
   WARNX("PTS: (%u) %llu", info->stream, pts);
#endif

   uint8_t frame[] = {
      info->stream,
      0000,
      00000000
   };

   memcpy(frame + 1, (uint32_t[]){size}sizeof(uint32_t));
   memcpy(frame + 1 + 4, (uint64_t[]){pts}sizeof(uint64_t));

   {
      const size_t pipe_sz = (FPS / 4) * (size + sizeof(frame));

      if (fifo->size < pipe_sz) {
         if (fcntl(fifo->fd, F_SETPIPE_SZ, pipe_sz) == -1) {
            WARN("fcntl(F_SETPIPE_SZ, %zu) (%u)", pipe_sz, info->stream);
            reset_fifo(fifo);
            return;
         }

         fifo->size = pipe_sz;
         setvbuf(fifo->file, NULL_IOFBF, fifo->size / 8);
      }
   }

   errno = 0;
   size_t ret;
   if ((ret = fwrite(frame, 1sizeof(frame), fifo->file) != sizeof(frame)) ||
      ((ret = fwrite(buffer, 1, size, fifo->file)) != size)) {
      WARN("write(%zu) (%u)", ret, info->stream);
      reset_fifo(fifo);
   }
}

static void
write_data(const struct frame_info *info, const void *buffer, const size_t size)
{
   // we need to protect our fifo structure, since games usually output audio on another thread and so
   static struct fifo fifo = { .fd = -1 };
   static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
   pthread_mutex_lock(&mutex);
   write_data_unsafe(&fifo, info, buffer, size);
   pthread_mutex_unlock(&mutex);
}

void
flip_pixels_if_needed(const GLint view[4], uint8_t *pixels, const uint32_t width, const uint32_t height, const uint8_t components)
{
   // Will detect at least wine which blits viewport sized framebuffer at the end already flipped
   if (!FLIP_VIDEO ||
       (LAST_FRAMEBUFFER_BLIT[0] == 0 && LAST_FRAMEBUFFER_BLIT[1] == 0 &&
        LAST_FRAMEBUFFER_BLIT[2] == view[2] && LAST_FRAMEBUFFER_BLIT[3] == view[3] &&
        LAST_FRAMEBUFFER_BLIT[4] == 0 && LAST_FRAMEBUFFER_BLIT[5] == view[3] &&
        LAST_FRAMEBUFFER_BLIT[6] == view[2] && LAST_FRAMEBUFFER_BLIT[7] == 0))
      return;

   // Sadly I can't come up with any reliable way to do this on GPU on all possible OpenGL versions and variants.
   // FIXME: This function however is quite expensive and causes capture to take more than 1ms easily.
   //        Should try dig deeper and see how I could make GPU do the flip without having to read twice.

   const uint32_t stride = width * components;
   static __thread struct buffer row;
   buffer_resize(&row, stride);

   for (uint8_t *lo = pixels, *hi = pixels + (height - 1) * stride; lo < hi; lo += stride, hi -= stride) {
      memcpy(row.data, lo, stride);
      memcpy(lo, hi, stride);
      memcpy(hi, row.data, stride);
   }
}

static void
capture_frame_pbo(struct gl *gl, const GLint view[4], const uint64_t ts)
{
   const struct {
      const char *video;
      GLenum format;
      uint8_t components;
   } frame = {
      // XXX: Maybe on ES we should instead modify the data and remove A component?
      //      Would save some transmission bandwidth at least (from GPU and to PIPE)
      //      RGB also is unaligned, but seem just as fast as RGBA on Nvidia.
      .video = (OPENGL_VARIANT == OPENGL_ES ? "rgb0" : "rgb"),
      .format = (OPENGL_VARIANT == OPENGL_ES ? GL_RGBA : GL_RGB),
      .components = (OPENGL_VARIANT == OPENGL_ES ? 4 : 3),
   };

   if (!glIsBuffer(gl->pbo[gl->active].obj)) {
      WARNX("create pbo %u", gl->active);
      glGenBuffers(1, &gl->pbo[gl->active].obj);
   }

   struct { GLenum t; GLint o; GLint v; } map[] = {
      { .t = GL_PACK_ALIGNMENT, .v = 1 },
      { .t = GL_PACK_ROW_LENGTH },
      { .t = GL_PACK_IMAGE_HEIGHT },
      { .t = GL_PACK_SKIP_PIXELS },
   };

   PROFILE(
   glBindBuffer(GL_PIXEL_PACK_BUFFER, gl->pbo[gl->active].obj);
   glBufferData(GL_PIXEL_PACK_BUFFER, view[2] * view[3] * frame.components, NULL, GL_STREAM_READ);

   for (size_t i = 0; i < ARRAY_SIZE(map); ++i) {
      glGetIntegerv(map[i].t, &map[i].o);
      glPixelStorei(map[i].t, map[i].v);
   }

   glReadPixels(view[0], view[1], view[2], view[3], frame.format, GL_UNSIGNED_BYTE, NULL);

   for (size_t i = 0; i < ARRAY_SIZE(map); ++i)
      glPixelStorei(map[i].t, map[i].o);

   gl->pbo[gl->active].ts = ts;
   gl->pbo[gl->active].width = view[2];
   gl->pbo[gl->active].height = view[3];
   gl->pbo[gl->active].written = true;
   , 1.0"read_frame");

   gl->active = (gl->active + 1) % NUM_PBOS;

   if (glIsBuffer(gl->pbo[gl->active].obj) && gl->pbo[gl->active].written) {
      const struct frame_info info = {
         .ts = gl->pbo[gl->active].ts,
         .stream = STREAM_VIDEO,
         .format = frame.video,
         .video.width = gl->pbo[gl->active].width,
         .video.height = gl->pbo[gl->active].height,
         .video.fps = FPS,
      };

      void *buf;
      const size_t size = info.video.width * info.video.height * frame.components;
      PROFILE(
      glBindBuffer(GL_PIXEL_PACK_BUFFER, gl->pbo[gl->active].obj);
      if ((buf = glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, size, GL_MAP_READ_BIT))) {
         flip_pixels_if_needed(view, buf, info.video.width, info.video.height, frame.components);
         write_data(&info, buf, size);
         glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
         gl->pbo[gl->active].written = false;
      }
      , 2.0"write_frame");
   }
}

static void
reset_capture(struct gl *gl)
{
   for (size_t i = 0; i < NUM_PBOS; ++i) {
      if (glIsBuffer(gl->pbo[i].obj))
         glDeleteBuffers(1, &gl->pbo[i].obj);
   }

   WARNX("capture reset");
   memset(gl->pbo, 0sizeof(gl->pbo));
   gl->active = 0;
}

static void
capture_frame(struct gl *gl, const GLint view[4])
{
   static __thread uint64_t last_time;
   const uint64_t ts = get_time_ns();
   const uint64_t rate = 1e9 / FPS;

   if (DROP_FRAMES && last_time > 0 && ts - last_time < rate)
      return;

   last_time = ts;

   GLint pbo;
   glGetIntegerv(GL_PIXEL_PACK_BUFFER_BINDING, &pbo);
   capture_frame_pbo(gl, view, ts);
   glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo);
}

static void
draw_indicator(const GLint view[4])
{
   GLfloat clear[4];
   GLboolean scissor;
   glGetFloatv(GL_COLOR_CLEAR_VALUE, clear);
   glGetBooleanv(GL_SCISSOR_TEST, &scissor);

   if (!scissor)
      glEnable(GL_SCISSOR_TEST);

   const uint32_t size = (view[3] / 75 > 10 ? view[3] / 75 : 10);
   glScissor(size / 2 - 1, view[3] - size - size / 2 - 1, size + 2, size + 2);
   glClearColor(0.0f0.0f0.0f0.0f);
   glClear(GL_COLOR_BUFFER_BIT);
   glScissor(size / 2, view[3] - size - size / 2, size, size);
   glClearColor(1.0f0.0f0.0f0.0f);
   glClear(GL_COLOR_BUFFER_BIT);

   if (!scissor)
      glDisable(GL_SCISSOR_TEST);

   glClearColor(clear[0], clear[1], clear[2], clear[3]);
}

static void
swap_buffers(void)
{
   void* (*procs[])(const char*) = {
      (void*)_eglGetProcAddress,
      (void*)_glXGetProcAddressARB,
      (void*)_glXGetProcAddress
   };

   load_gl_function_pointers(procs, ARRAY_SIZE(procs));

   PROFILE(
   GLint view[4] = {0};
   static __thread struct gl gl;
   const GLenum error0 = glGetError();
   glGetIntegerv(GL_VIEWPORT, view);
   PROFILE(capture_frame(&gl, view), 2.0"capture_frame");
   PROFILE(draw_indicator(view), 0.5"draw_indicator");

   if (error0 != glGetError()) {
      WARNX("glError occured");
      reset_capture(&gl);
   }
   , 2.0"swap_buffers");
}

static const char*
alsa_get_format(const snd_pcm_format_t format)
{
   switch (format) {
      case SND_PCM_FORMAT_FLOAT64_LE: return "f64le";
      case SND_PCM_FORMAT_FLOAT64_BE: return "f64be";
      case SND_PCM_FORMAT_FLOAT_LE: return "f32le";
      case SND_PCM_FORMAT_FLOAT_BE: return "f32be";
      case SND_PCM_FORMAT_S32_LE: return "s32le";
      case SND_PCM_FORMAT_S32_BE: return "s32be";
      case SND_PCM_FORMAT_U32_LE: return "u32le";
      case SND_PCM_FORMAT_U32_BE: return "u32be";
      case SND_PCM_FORMAT_S24_LE: return "s24le";
      case SND_PCM_FORMAT_S24_BE: return "s24be";
      case SND_PCM_FORMAT_U24_LE: return "u24le";
      case SND_PCM_FORMAT_U24_BE: return "u24be";
      case SND_PCM_FORMAT_S16_LE: return "s16le";
      case SND_PCM_FORMAT_S16_BE: return "s16be";
      case SND_PCM_FORMAT_U16_LE: return "u16le";
      case SND_PCM_FORMAT_U16_BE: return "u16be";
      case SND_PCM_FORMAT_S8: return "s8";
      case SND_PCM_FORMAT_U8: return "u8";
      case SND_PCM_FORMAT_MU_LAW: return "mulaw";
      case SND_PCM_FORMAT_A_LAW: return "alaw";
      defaultbreak;
   }

   WARN_ONCE("can't convert alsa format: %u", format);
   return NULL;
}

static bool
alsa_get_frame_info(snd_pcm_t *pcm, struct frame_info *out_info, const char *caller)
{
   snd_pcm_format_t format;
   unsigned int channels, rate;
   snd_pcm_hw_params_t *params = alloca(snd_pcm_hw_params_sizeof());
   snd_pcm_hw_params_current(pcm, params);
   snd_pcm_hw_params_get_format(params, &format);
   snd_pcm_hw_params_get_channels(params, &channels);
   snd_pcm_hw_params_get_rate(params, &rate, NULL);
   WARN_ONCE("%s (%s:%u:%u)", caller, snd_pcm_format_name(format), rate, channels);
   out_info->ts = get_time_ns();
   out_info->stream = STREAM_AUDIO;
   out_info->format = alsa_get_format(format);
   out_info->audio.rate = rate;
   out_info->audio.channels = channels;
   return (out_info->format != NULL);
}

static void
alsa_writei(snd_pcm_t *pcm, const void *buffer, const snd_pcm_uframes_t size, const char *caller)
{
   struct frame_info info;
   if (alsa_get_frame_info(pcm, &info, caller))
      PROFILE(write_data(&info, buffer, snd_pcm_frames_to_bytes(pcm, size)), 2.0"alsa_write");
}

static uint64_t
get_fake_time_ns(void)
{
   static __thread uint64_t base;
   const uint64_t current = get_time_ns();
   base = (base ? base : current);
   return base + (current - base) * SPEED_HACK;
}