forked from cory/tildefriends
		
	git-svn-id: https://www.unprompted.com/svn/projects/tildefriends/trunk@4336 ed5197a5-7fde-0310-b194-c3ffbd925b24
		
			
				
	
	
		
			2517 lines
		
	
	
		
			63 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			2517 lines
		
	
	
		
			63 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
 | |
|  * Permission is hereby granted, free of charge, to any person obtaining a copy
 | |
|  * of this software and associated documentation files (the "Software"), to
 | |
|  * deal in the Software without restriction, including without limitation the
 | |
|  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 | |
|  * sell copies of the Software, and to permit persons to whom the Software is
 | |
|  * furnished to do so, subject to the following conditions:
 | |
|  *
 | |
|  * The above copyright notice and this permission notice shall be included in
 | |
|  * all copies or substantial portions of the Software.
 | |
|  *
 | |
|  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 | |
|  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | |
|  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 | |
|  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 | |
|  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 | |
|  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 | |
|  * IN THE SOFTWARE.
 | |
|  */
 | |
| 
 | |
| /* We lean on the fact that POLL{IN,OUT,ERR,HUP} correspond with their
 | |
|  * EPOLL* counterparts.  We use the POLL* variants in this file because that
 | |
|  * is what libuv uses elsewhere.
 | |
|  */
 | |
| 
 | |
| #include "uv.h"
 | |
| #include "internal.h"
 | |
| 
 | |
| #include <inttypes.h>
 | |
| #include <stdatomic.h>
 | |
| #include <stddef.h>  /* offsetof */
 | |
| #include <stdint.h>
 | |
| #include <stdio.h>
 | |
| #include <stdlib.h>
 | |
| #include <string.h>
 | |
| #include <assert.h>
 | |
| #include <errno.h>
 | |
| 
 | |
| #include <fcntl.h>
 | |
| #include <net/if.h>
 | |
| #include <sys/epoll.h>
 | |
| #include <sys/inotify.h>
 | |
| #include <sys/mman.h>
 | |
| #include <sys/param.h>
 | |
| #include <sys/prctl.h>
 | |
| #include <sys/stat.h>
 | |
| #include <sys/syscall.h>
 | |
| #include <sys/sysinfo.h>
 | |
| #include <sys/sysmacros.h>
 | |
| #include <sys/types.h>
 | |
| #include <sys/utsname.h>
 | |
| #include <time.h>
 | |
| #include <unistd.h>
 | |
| 
 | |
| #ifndef __NR_io_uring_setup
 | |
| # define __NR_io_uring_setup 425
 | |
| #endif
 | |
| 
 | |
| #ifndef __NR_io_uring_enter
 | |
| # define __NR_io_uring_enter 426
 | |
| #endif
 | |
| 
 | |
| #ifndef __NR_io_uring_register
 | |
| # define __NR_io_uring_register 427
 | |
| #endif
 | |
| 
 | |
| #ifndef __NR_copy_file_range
 | |
| # if defined(__x86_64__)
 | |
| #  define __NR_copy_file_range 326
 | |
| # elif defined(__i386__)
 | |
| #  define __NR_copy_file_range 377
 | |
| # elif defined(__s390__)
 | |
| #  define __NR_copy_file_range 375
 | |
| # elif defined(__arm__)
 | |
| #  define __NR_copy_file_range 391
 | |
| # elif defined(__aarch64__)
 | |
| #  define __NR_copy_file_range 285
 | |
| # elif defined(__powerpc__)
 | |
| #  define __NR_copy_file_range 379
 | |
| # elif defined(__arc__)
 | |
| #  define __NR_copy_file_range 285
 | |
| # endif
 | |
| #endif /* __NR_copy_file_range */
 | |
| 
 | |
| #ifndef __NR_statx
 | |
| # if defined(__x86_64__)
 | |
| #  define __NR_statx 332
 | |
| # elif defined(__i386__)
 | |
| #  define __NR_statx 383
 | |
| # elif defined(__aarch64__)
 | |
| #  define __NR_statx 397
 | |
| # elif defined(__arm__)
 | |
| #  define __NR_statx 397
 | |
| # elif defined(__ppc__)
 | |
| #  define __NR_statx 383
 | |
| # elif defined(__s390__)
 | |
| #  define __NR_statx 379
 | |
| # endif
 | |
| #endif /* __NR_statx */
 | |
| 
 | |
| #ifndef __NR_getrandom
 | |
| # if defined(__x86_64__)
 | |
| #  define __NR_getrandom 318
 | |
| # elif defined(__i386__)
 | |
| #  define __NR_getrandom 355
 | |
| # elif defined(__aarch64__)
 | |
| #  define __NR_getrandom 384
 | |
| # elif defined(__arm__)
 | |
| #  define __NR_getrandom 384
 | |
| # elif defined(__ppc__)
 | |
| #  define __NR_getrandom 359
 | |
| # elif defined(__s390__)
 | |
| #  define __NR_getrandom 349
 | |
| # endif
 | |
| #endif /* __NR_getrandom */
 | |
| 
 | |
| #define HAVE_IFADDRS_H 1
 | |
| 
 | |
| # if defined(__ANDROID_API__) && __ANDROID_API__ < 24
 | |
| # undef HAVE_IFADDRS_H
 | |
| #endif
 | |
| 
 | |
| #ifdef __UCLIBC__
 | |
| # if __UCLIBC_MAJOR__ < 0 && __UCLIBC_MINOR__ < 9 && __UCLIBC_SUBLEVEL__ < 32
 | |
| #  undef HAVE_IFADDRS_H
 | |
| # endif
 | |
| #endif
 | |
| 
 | |
| #ifdef HAVE_IFADDRS_H
 | |
| # include <ifaddrs.h>
 | |
| # include <sys/socket.h>
 | |
| # include <net/ethernet.h>
 | |
| # include <netpacket/packet.h>
 | |
| #endif /* HAVE_IFADDRS_H */
 | |
| 
 | |
| enum {
 | |
|   UV__IORING_SETUP_SQPOLL = 2u,
 | |
| };
 | |
| 
 | |
| enum {
 | |
|   UV__IORING_FEAT_SINGLE_MMAP = 1u,
 | |
|   UV__IORING_FEAT_NODROP = 2u,
 | |
|   UV__IORING_FEAT_RSRC_TAGS = 1024u,  /* linux v5.13 */
 | |
| };
 | |
| 
 | |
| enum {
 | |
|   UV__IORING_OP_READV = 1,
 | |
|   UV__IORING_OP_WRITEV = 2,
 | |
|   UV__IORING_OP_FSYNC = 3,
 | |
|   UV__IORING_OP_OPENAT = 18,
 | |
|   UV__IORING_OP_CLOSE = 19,
 | |
|   UV__IORING_OP_STATX = 21,
 | |
|   UV__IORING_OP_EPOLL_CTL = 29,
 | |
|   UV__IORING_OP_RENAMEAT = 35,
 | |
|   UV__IORING_OP_UNLINKAT = 36,
 | |
|   UV__IORING_OP_MKDIRAT = 37,
 | |
|   UV__IORING_OP_SYMLINKAT = 38,
 | |
|   UV__IORING_OP_LINKAT = 39,
 | |
| };
 | |
| 
 | |
| enum {
 | |
|   UV__IORING_ENTER_GETEVENTS = 1u,
 | |
|   UV__IORING_ENTER_SQ_WAKEUP = 2u,
 | |
| };
 | |
| 
 | |
| enum {
 | |
|   UV__IORING_SQ_NEED_WAKEUP = 1u,
 | |
|   UV__IORING_SQ_CQ_OVERFLOW = 2u,
 | |
| };
 | |
| 
 | |
| enum {
 | |
|   UV__MKDIRAT_SYMLINKAT_LINKAT = 1u,
 | |
| };
 | |
| 
 | |
| struct uv__io_cqring_offsets {
 | |
|   uint32_t head;
 | |
|   uint32_t tail;
 | |
|   uint32_t ring_mask;
 | |
|   uint32_t ring_entries;
 | |
|   uint32_t overflow;
 | |
|   uint32_t cqes;
 | |
|   uint64_t reserved0;
 | |
|   uint64_t reserved1;
 | |
| };
 | |
| 
 | |
| STATIC_ASSERT(40 == sizeof(struct uv__io_cqring_offsets));
 | |
| 
 | |
| struct uv__io_sqring_offsets {
 | |
|   uint32_t head;
 | |
|   uint32_t tail;
 | |
|   uint32_t ring_mask;
 | |
|   uint32_t ring_entries;
 | |
|   uint32_t flags;
 | |
|   uint32_t dropped;
 | |
|   uint32_t array;
 | |
|   uint32_t reserved0;
 | |
|   uint64_t reserved1;
 | |
| };
 | |
| 
 | |
| STATIC_ASSERT(40 == sizeof(struct uv__io_sqring_offsets));
 | |
| 
 | |
| struct uv__io_uring_cqe {
 | |
|   uint64_t user_data;
 | |
|   int32_t res;
 | |
|   uint32_t flags;
 | |
| };
 | |
| 
 | |
| STATIC_ASSERT(16 == sizeof(struct uv__io_uring_cqe));
 | |
| 
 | |
| struct uv__io_uring_sqe {
 | |
|   uint8_t opcode;
 | |
|   uint8_t flags;
 | |
|   uint16_t ioprio;
 | |
|   int32_t fd;
 | |
|   union {
 | |
|     uint64_t off;
 | |
|     uint64_t addr2;
 | |
|   };
 | |
|   union {
 | |
|     uint64_t addr;
 | |
|   };
 | |
|   uint32_t len;
 | |
|   union {
 | |
|     uint32_t rw_flags;
 | |
|     uint32_t fsync_flags;
 | |
|     uint32_t open_flags;
 | |
|     uint32_t statx_flags;
 | |
|   };
 | |
|   uint64_t user_data;
 | |
|   union {
 | |
|     uint16_t buf_index;
 | |
|     uint64_t pad[3];
 | |
|   };
 | |
| };
 | |
| 
 | |
| STATIC_ASSERT(64 == sizeof(struct uv__io_uring_sqe));
 | |
| STATIC_ASSERT(0 == offsetof(struct uv__io_uring_sqe, opcode));
 | |
| STATIC_ASSERT(1 == offsetof(struct uv__io_uring_sqe, flags));
 | |
| STATIC_ASSERT(2 == offsetof(struct uv__io_uring_sqe, ioprio));
 | |
| STATIC_ASSERT(4 == offsetof(struct uv__io_uring_sqe, fd));
 | |
| STATIC_ASSERT(8 == offsetof(struct uv__io_uring_sqe, off));
 | |
| STATIC_ASSERT(16 == offsetof(struct uv__io_uring_sqe, addr));
 | |
| STATIC_ASSERT(24 == offsetof(struct uv__io_uring_sqe, len));
 | |
| STATIC_ASSERT(28 == offsetof(struct uv__io_uring_sqe, rw_flags));
 | |
| STATIC_ASSERT(32 == offsetof(struct uv__io_uring_sqe, user_data));
 | |
| STATIC_ASSERT(40 == offsetof(struct uv__io_uring_sqe, buf_index));
 | |
| 
 | |
| struct uv__io_uring_params {
 | |
|   uint32_t sq_entries;
 | |
|   uint32_t cq_entries;
 | |
|   uint32_t flags;
 | |
|   uint32_t sq_thread_cpu;
 | |
|   uint32_t sq_thread_idle;
 | |
|   uint32_t features;
 | |
|   uint32_t reserved[4];
 | |
|   struct uv__io_sqring_offsets sq_off;  /* 40 bytes */
 | |
|   struct uv__io_cqring_offsets cq_off;  /* 40 bytes */
 | |
| };
 | |
| 
 | |
| STATIC_ASSERT(40 + 40 + 40 == sizeof(struct uv__io_uring_params));
 | |
| STATIC_ASSERT(40 == offsetof(struct uv__io_uring_params, sq_off));
 | |
| STATIC_ASSERT(80 == offsetof(struct uv__io_uring_params, cq_off));
 | |
| 
 | |
| STATIC_ASSERT(EPOLL_CTL_ADD < 4);
 | |
| STATIC_ASSERT(EPOLL_CTL_DEL < 4);
 | |
| STATIC_ASSERT(EPOLL_CTL_MOD < 4);
 | |
| 
 | |
| struct watcher_list {
 | |
|   RB_ENTRY(watcher_list) entry;
 | |
|   struct uv__queue watchers;
 | |
|   int iterating;
 | |
|   char* path;
 | |
|   int wd;
 | |
| };
 | |
| 
 | |
| struct watcher_root {
 | |
|   struct watcher_list* rbh_root;
 | |
| };
 | |
| 
 | |
| static int uv__inotify_fork(uv_loop_t* loop, struct watcher_list* root);
 | |
| static void uv__inotify_read(uv_loop_t* loop,
 | |
|                              uv__io_t* w,
 | |
|                              unsigned int revents);
 | |
| static int compare_watchers(const struct watcher_list* a,
 | |
|                             const struct watcher_list* b);
 | |
| static void maybe_free_watcher_list(struct watcher_list* w,
 | |
|                                     uv_loop_t* loop);
 | |
| 
 | |
| static void uv__epoll_ctl_flush(int epollfd,
 | |
|                                 struct uv__iou* ctl,
 | |
|                                 struct epoll_event (*events)[256]);
 | |
| 
 | |
| static void uv__epoll_ctl_prep(int epollfd,
 | |
|                                struct uv__iou* ctl,
 | |
|                                struct epoll_event (*events)[256],
 | |
|                                int op,
 | |
|                                int fd,
 | |
|                                struct epoll_event* e);
 | |
| 
 | |
| RB_GENERATE_STATIC(watcher_root, watcher_list, entry, compare_watchers)
 | |
| 
 | |
| 
 | |
| static struct watcher_root* uv__inotify_watchers(uv_loop_t* loop) {
 | |
|   /* This cast works because watcher_root is a struct with a pointer as its
 | |
|    * sole member. Such type punning is unsafe in the presence of strict
 | |
|    * pointer aliasing (and is just plain nasty) but that is why libuv
 | |
|    * is compiled with -fno-strict-aliasing.
 | |
|    */
 | |
|   return (struct watcher_root*) &loop->inotify_watchers;
 | |
| }
 | |
| 
 | |
| 
 | |
| unsigned uv__kernel_version(void) {
 | |
|   static _Atomic unsigned cached_version;
 | |
|   struct utsname u;
 | |
|   unsigned version;
 | |
|   unsigned major;
 | |
|   unsigned minor;
 | |
|   unsigned patch;
 | |
| 
 | |
|   version = atomic_load_explicit(&cached_version, memory_order_relaxed);
 | |
|   if (version != 0)
 | |
|     return version;
 | |
| 
 | |
|   if (-1 == uname(&u))
 | |
|     return 0;
 | |
| 
 | |
|   if (3 != sscanf(u.release, "%u.%u.%u", &major, &minor, &patch))
 | |
|     return 0;
 | |
| 
 | |
|   version = major * 65536 + minor * 256 + patch;
 | |
|   atomic_store_explicit(&cached_version, version, memory_order_relaxed);
 | |
| 
 | |
|   return version;
 | |
| }
 | |
| 
 | |
| 
 | |
| ssize_t
 | |
| uv__fs_copy_file_range(int fd_in,
 | |
|                        off_t* off_in,
 | |
|                        int fd_out,
 | |
|                        off_t* off_out,
 | |
|                        size_t len,
 | |
|                        unsigned int flags)
 | |
| {
 | |
| #ifdef __NR_copy_file_range
 | |
|   return syscall(__NR_copy_file_range,
 | |
|                  fd_in,
 | |
|                  off_in,
 | |
|                  fd_out,
 | |
|                  off_out,
 | |
|                  len,
 | |
|                  flags);
 | |
| #else
 | |
|   return errno = ENOSYS, -1;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| 
 | |
| int uv__statx(int dirfd,
 | |
|               const char* path,
 | |
|               int flags,
 | |
|               unsigned int mask,
 | |
|               struct uv__statx* statxbuf) {
 | |
| #if !defined(__NR_statx) || defined(__ANDROID_API__) && __ANDROID_API__ < 30
 | |
|   return errno = ENOSYS, -1;
 | |
| #else
 | |
|   int rc;
 | |
| 
 | |
|   rc = syscall(__NR_statx, dirfd, path, flags, mask, statxbuf);
 | |
|   if (rc >= 0)
 | |
|     uv__msan_unpoison(statxbuf, sizeof(*statxbuf));
 | |
| 
 | |
|   return rc;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| 
 | |
| ssize_t uv__getrandom(void* buf, size_t buflen, unsigned flags) {
 | |
| #if !defined(__NR_getrandom) || defined(__ANDROID_API__) && __ANDROID_API__ < 28
 | |
|   return errno = ENOSYS, -1;
 | |
| #else
 | |
|   ssize_t rc;
 | |
| 
 | |
|   rc = syscall(__NR_getrandom, buf, buflen, flags);
 | |
|   if (rc >= 0)
 | |
|     uv__msan_unpoison(buf, buflen);
 | |
| 
 | |
|   return rc;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| 
 | |
| int uv__io_uring_setup(int entries, struct uv__io_uring_params* params) {
 | |
|   return syscall(__NR_io_uring_setup, entries, params);
 | |
| }
 | |
| 
 | |
| 
 | |
| int uv__io_uring_enter(int fd,
 | |
|                        unsigned to_submit,
 | |
|                        unsigned min_complete,
 | |
|                        unsigned flags) {
 | |
|   /* io_uring_enter used to take a sigset_t but it's unused
 | |
|    * in newer kernels unless IORING_ENTER_EXT_ARG is set,
 | |
|    * in which case it takes a struct io_uring_getevents_arg.
 | |
|    */
 | |
|   return syscall(__NR_io_uring_enter,
 | |
|                  fd,
 | |
|                  to_submit,
 | |
|                  min_complete,
 | |
|                  flags,
 | |
|                  NULL,
 | |
|                  0L);
 | |
| }
 | |
| 
 | |
| 
 | |
| int uv__io_uring_register(int fd, unsigned opcode, void* arg, unsigned nargs) {
 | |
|   return syscall(__NR_io_uring_register, fd, opcode, arg, nargs);
 | |
| }
 | |
| 
 | |
| 
 | |
| static int uv__use_io_uring(void) {
 | |
| #if defined(__ANDROID_API__)
 | |
|   return 0;  /* Possibly available but blocked by seccomp. */
 | |
| #else
 | |
|   /* Ternary: unknown=0, yes=1, no=-1 */
 | |
|   static _Atomic int use_io_uring;
 | |
|   char* val;
 | |
|   int use;
 | |
| 
 | |
|   use = atomic_load_explicit(&use_io_uring, memory_order_relaxed);
 | |
| 
 | |
|   if (use == 0) {
 | |
|     val = getenv("UV_USE_IO_URING");
 | |
|     use = val == NULL || atoi(val) ? 1 : -1;
 | |
|     atomic_store_explicit(&use_io_uring, use, memory_order_relaxed);
 | |
|   }
 | |
| 
 | |
|   return use > 0;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| 
 | |
| static void uv__iou_init(int epollfd,
 | |
|                          struct uv__iou* iou,
 | |
|                          uint32_t entries,
 | |
|                          uint32_t flags) {
 | |
|   struct uv__io_uring_params params;
 | |
|   struct epoll_event e;
 | |
|   size_t cqlen;
 | |
|   size_t sqlen;
 | |
|   size_t maxlen;
 | |
|   size_t sqelen;
 | |
|   uint32_t i;
 | |
|   char* sq;
 | |
|   char* sqe;
 | |
|   int ringfd;
 | |
| 
 | |
|   sq = MAP_FAILED;
 | |
|   sqe = MAP_FAILED;
 | |
| 
 | |
|   if (!uv__use_io_uring())
 | |
|     return;
 | |
| 
 | |
|   /* SQPOLL required CAP_SYS_NICE until linux v5.12 relaxed that requirement.
 | |
|    * Mostly academic because we check for a v5.13 kernel afterwards anyway.
 | |
|    */
 | |
|   memset(¶ms, 0, sizeof(params));
 | |
|   params.flags = flags;
 | |
| 
 | |
|   if (flags & UV__IORING_SETUP_SQPOLL)
 | |
|     params.sq_thread_idle = 10;  /* milliseconds */
 | |
| 
 | |
|   /* Kernel returns a file descriptor with O_CLOEXEC flag set. */
 | |
|   ringfd = uv__io_uring_setup(entries, ¶ms);
 | |
|   if (ringfd == -1)
 | |
|     return;
 | |
| 
 | |
|   /* IORING_FEAT_RSRC_TAGS is used to detect linux v5.13 but what we're
 | |
|    * actually detecting is whether IORING_OP_STATX works with SQPOLL.
 | |
|    */
 | |
|   if (!(params.features & UV__IORING_FEAT_RSRC_TAGS))
 | |
|     goto fail;
 | |
| 
 | |
|   /* Implied by IORING_FEAT_RSRC_TAGS but checked explicitly anyway. */
 | |
|   if (!(params.features & UV__IORING_FEAT_SINGLE_MMAP))
 | |
|     goto fail;
 | |
| 
 | |
|   /* Implied by IORING_FEAT_RSRC_TAGS but checked explicitly anyway. */
 | |
|   if (!(params.features & UV__IORING_FEAT_NODROP))
 | |
|     goto fail;
 | |
| 
 | |
|   sqlen = params.sq_off.array + params.sq_entries * sizeof(uint32_t);
 | |
|   cqlen =
 | |
|       params.cq_off.cqes + params.cq_entries * sizeof(struct uv__io_uring_cqe);
 | |
|   maxlen = sqlen < cqlen ? cqlen : sqlen;
 | |
|   sqelen = params.sq_entries * sizeof(struct uv__io_uring_sqe);
 | |
| 
 | |
|   sq = mmap(0,
 | |
|             maxlen,
 | |
|             PROT_READ | PROT_WRITE,
 | |
|             MAP_SHARED | MAP_POPULATE,
 | |
|             ringfd,
 | |
|             0);  /* IORING_OFF_SQ_RING */
 | |
| 
 | |
|   sqe = mmap(0,
 | |
|              sqelen,
 | |
|              PROT_READ | PROT_WRITE,
 | |
|              MAP_SHARED | MAP_POPULATE,
 | |
|              ringfd,
 | |
|              0x10000000ull);  /* IORING_OFF_SQES */
 | |
| 
 | |
|   if (sq == MAP_FAILED || sqe == MAP_FAILED)
 | |
|     goto fail;
 | |
| 
 | |
|   if (flags & UV__IORING_SETUP_SQPOLL) {
 | |
|     /* Only interested in completion events. To get notified when
 | |
|      * the kernel pulls items from the submission ring, add POLLOUT.
 | |
|      */
 | |
|     memset(&e, 0, sizeof(e));
 | |
|     e.events = POLLIN;
 | |
|     e.data.fd = ringfd;
 | |
| 
 | |
|     if (epoll_ctl(epollfd, EPOLL_CTL_ADD, ringfd, &e))
 | |
|       goto fail;
 | |
|   }
 | |
| 
 | |
|   iou->sqhead = (uint32_t*) (sq + params.sq_off.head);
 | |
|   iou->sqtail = (uint32_t*) (sq + params.sq_off.tail);
 | |
|   iou->sqmask = *(uint32_t*) (sq + params.sq_off.ring_mask);
 | |
|   iou->sqarray = (uint32_t*) (sq + params.sq_off.array);
 | |
|   iou->sqflags = (uint32_t*) (sq + params.sq_off.flags);
 | |
|   iou->cqhead = (uint32_t*) (sq + params.cq_off.head);
 | |
|   iou->cqtail = (uint32_t*) (sq + params.cq_off.tail);
 | |
|   iou->cqmask = *(uint32_t*) (sq + params.cq_off.ring_mask);
 | |
|   iou->sq = sq;
 | |
|   iou->cqe = sq + params.cq_off.cqes;
 | |
|   iou->sqe = sqe;
 | |
|   iou->sqlen = sqlen;
 | |
|   iou->cqlen = cqlen;
 | |
|   iou->maxlen = maxlen;
 | |
|   iou->sqelen = sqelen;
 | |
|   iou->ringfd = ringfd;
 | |
|   iou->in_flight = 0;
 | |
|   iou->flags = 0;
 | |
| 
 | |
|   if (uv__kernel_version() >= /* 5.15.0 */ 0x050F00)
 | |
|     iou->flags |= UV__MKDIRAT_SYMLINKAT_LINKAT;
 | |
| 
 | |
|   for (i = 0; i <= iou->sqmask; i++)
 | |
|     iou->sqarray[i] = i;  /* Slot -> sqe identity mapping. */
 | |
| 
 | |
|   return;
 | |
| 
 | |
| fail:
 | |
|   if (sq != MAP_FAILED)
 | |
|     munmap(sq, maxlen);
 | |
| 
 | |
|   if (sqe != MAP_FAILED)
 | |
|     munmap(sqe, sqelen);
 | |
| 
 | |
|   uv__close(ringfd);
 | |
| }
 | |
| 
 | |
| 
 | |
| static void uv__iou_delete(struct uv__iou* iou) {
 | |
|   if (iou->ringfd != -1) {
 | |
|     munmap(iou->sq, iou->maxlen);
 | |
|     munmap(iou->sqe, iou->sqelen);
 | |
|     uv__close(iou->ringfd);
 | |
|     iou->ringfd = -1;
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| int uv__platform_loop_init(uv_loop_t* loop) {
 | |
|   uv__loop_internal_fields_t* lfields;
 | |
| 
 | |
|   lfields = uv__get_internal_fields(loop);
 | |
|   lfields->ctl.ringfd = -1;
 | |
|   lfields->iou.ringfd = -1;
 | |
| 
 | |
|   loop->inotify_watchers = NULL;
 | |
|   loop->inotify_fd = -1;
 | |
|   loop->backend_fd = epoll_create1(O_CLOEXEC);
 | |
| 
 | |
|   if (loop->backend_fd == -1)
 | |
|     return UV__ERR(errno);
 | |
| 
 | |
|   uv__iou_init(loop->backend_fd, &lfields->iou, 64, UV__IORING_SETUP_SQPOLL);
 | |
|   uv__iou_init(loop->backend_fd, &lfields->ctl, 256, 0);
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| int uv__io_fork(uv_loop_t* loop) {
 | |
|   int err;
 | |
|   struct watcher_list* root;
 | |
| 
 | |
|   root = uv__inotify_watchers(loop)->rbh_root;
 | |
| 
 | |
|   uv__close(loop->backend_fd);
 | |
|   loop->backend_fd = -1;
 | |
| 
 | |
|   /* TODO(bnoordhuis) Loses items from the submission and completion rings. */
 | |
|   uv__platform_loop_delete(loop);
 | |
| 
 | |
|   err = uv__platform_loop_init(loop);
 | |
|   if (err)
 | |
|     return err;
 | |
| 
 | |
|   return uv__inotify_fork(loop, root);
 | |
| }
 | |
| 
 | |
| 
 | |
| void uv__platform_loop_delete(uv_loop_t* loop) {
 | |
|   uv__loop_internal_fields_t* lfields;
 | |
| 
 | |
|   lfields = uv__get_internal_fields(loop);
 | |
|   uv__iou_delete(&lfields->ctl);
 | |
|   uv__iou_delete(&lfields->iou);
 | |
| 
 | |
|   if (loop->inotify_fd != -1) {
 | |
|     uv__io_stop(loop, &loop->inotify_read_watcher, POLLIN);
 | |
|     uv__close(loop->inotify_fd);
 | |
|     loop->inotify_fd = -1;
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| struct uv__invalidate {
 | |
|   struct epoll_event (*prep)[256];
 | |
|   struct epoll_event* events;
 | |
|   int nfds;
 | |
| };
 | |
| 
 | |
| 
 | |
| void uv__platform_invalidate_fd(uv_loop_t* loop, int fd) {
 | |
|   uv__loop_internal_fields_t* lfields;
 | |
|   struct uv__invalidate* inv;
 | |
|   struct epoll_event dummy;
 | |
|   int i;
 | |
| 
 | |
|   lfields = uv__get_internal_fields(loop);
 | |
|   inv = lfields->inv;
 | |
| 
 | |
|   /* Invalidate events with same file descriptor */
 | |
|   if (inv != NULL)
 | |
|     for (i = 0; i < inv->nfds; i++)
 | |
|       if (inv->events[i].data.fd == fd)
 | |
|         inv->events[i].data.fd = -1;
 | |
| 
 | |
|   /* Remove the file descriptor from the epoll.
 | |
|    * This avoids a problem where the same file description remains open
 | |
|    * in another process, causing repeated junk epoll events.
 | |
|    *
 | |
|    * We pass in a dummy epoll_event, to work around a bug in old kernels.
 | |
|    *
 | |
|    * Work around a bug in kernels 3.10 to 3.19 where passing a struct that
 | |
|    * has the EPOLLWAKEUP flag set generates spurious audit syslog warnings.
 | |
|    */
 | |
|   memset(&dummy, 0, sizeof(dummy));
 | |
| 
 | |
|   if (inv == NULL) {
 | |
|     epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &dummy);
 | |
|   } else {
 | |
|     uv__epoll_ctl_prep(loop->backend_fd,
 | |
|                        &lfields->ctl,
 | |
|                        inv->prep,
 | |
|                        EPOLL_CTL_DEL,
 | |
|                        fd,
 | |
|                        &dummy);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| int uv__io_check_fd(uv_loop_t* loop, int fd) {
 | |
|   struct epoll_event e;
 | |
|   int rc;
 | |
| 
 | |
|   memset(&e, 0, sizeof(e));
 | |
|   e.events = POLLIN;
 | |
|   e.data.fd = -1;
 | |
| 
 | |
|   rc = 0;
 | |
|   if (epoll_ctl(loop->backend_fd, EPOLL_CTL_ADD, fd, &e))
 | |
|     if (errno != EEXIST)
 | |
|       rc = UV__ERR(errno);
 | |
| 
 | |
|   if (rc == 0)
 | |
|     if (epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &e))
 | |
|       abort();
 | |
| 
 | |
|   return rc;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* Caller must initialize SQE and call uv__iou_submit(). */
 | |
| static struct uv__io_uring_sqe* uv__iou_get_sqe(struct uv__iou* iou,
 | |
|                                                 uv_loop_t* loop,
 | |
|                                                 uv_fs_t* req) {
 | |
|   struct uv__io_uring_sqe* sqe;
 | |
|   uint32_t head;
 | |
|   uint32_t tail;
 | |
|   uint32_t mask;
 | |
|   uint32_t slot;
 | |
| 
 | |
|   if (iou->ringfd == -1)
 | |
|     return NULL;
 | |
| 
 | |
|   head = atomic_load_explicit((_Atomic uint32_t*) iou->sqhead,
 | |
|                               memory_order_acquire);
 | |
|   tail = *iou->sqtail;
 | |
|   mask = iou->sqmask;
 | |
| 
 | |
|   if ((head & mask) == ((tail + 1) & mask))
 | |
|     return NULL;  /* No room in ring buffer. TODO(bnoordhuis) maybe flush it? */
 | |
| 
 | |
|   slot = tail & mask;
 | |
|   sqe = iou->sqe;
 | |
|   sqe = &sqe[slot];
 | |
|   memset(sqe, 0, sizeof(*sqe));
 | |
|   sqe->user_data = (uintptr_t) req;
 | |
| 
 | |
|   /* Pacify uv_cancel(). */
 | |
|   req->work_req.loop = loop;
 | |
|   req->work_req.work = NULL;
 | |
|   req->work_req.done = NULL;
 | |
|   uv__queue_init(&req->work_req.wq);
 | |
| 
 | |
|   uv__req_register(loop, req);
 | |
|   iou->in_flight++;
 | |
| 
 | |
|   return sqe;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void uv__iou_submit(struct uv__iou* iou) {
 | |
|   uint32_t flags;
 | |
| 
 | |
|   atomic_store_explicit((_Atomic uint32_t*) iou->sqtail,
 | |
|                         *iou->sqtail + 1,
 | |
|                         memory_order_release);
 | |
| 
 | |
|   flags = atomic_load_explicit((_Atomic uint32_t*) iou->sqflags,
 | |
|                                memory_order_acquire);
 | |
| 
 | |
|   if (flags & UV__IORING_SQ_NEED_WAKEUP)
 | |
|     if (uv__io_uring_enter(iou->ringfd, 0, 0, UV__IORING_ENTER_SQ_WAKEUP))
 | |
|       if (errno != EOWNERDEAD)  /* Kernel bug. Harmless, ignore. */
 | |
|         perror("libuv: io_uring_enter(wakeup)");  /* Can't happen. */
 | |
| }
 | |
| 
 | |
| 
 | |
| int uv__iou_fs_close(uv_loop_t* loop, uv_fs_t* req) {
 | |
|   struct uv__io_uring_sqe* sqe;
 | |
|   struct uv__iou* iou;
 | |
| 
 | |
|   /* Work around a poorly understood bug in older kernels where closing a file
 | |
|    * descriptor pointing to /foo/bar results in ETXTBSY errors when trying to
 | |
|    * execve("/foo/bar") later on. The bug seems to have been fixed somewhere
 | |
|    * between 5.15.85 and 5.15.90. I couldn't pinpoint the responsible commit
 | |
|    * but good candidates are the several data race fixes. Interestingly, it
 | |
|    * seems to manifest only when running under Docker so the possibility of
 | |
|    * a Docker bug can't be completely ruled out either. Yay, computers.
 | |
|    */
 | |
|   if (uv__kernel_version() < /* 5.15.90 */ 0x050F5A)
 | |
|     return 0;
 | |
| 
 | |
|   iou = &uv__get_internal_fields(loop)->iou;
 | |
| 
 | |
|   sqe = uv__iou_get_sqe(iou, loop, req);
 | |
|   if (sqe == NULL)
 | |
|     return 0;
 | |
| 
 | |
|   sqe->fd = req->file;
 | |
|   sqe->opcode = UV__IORING_OP_CLOSE;
 | |
| 
 | |
|   uv__iou_submit(iou);
 | |
| 
 | |
|   return 1;
 | |
| }
 | |
| 
 | |
| 
 | |
| int uv__iou_fs_fsync_or_fdatasync(uv_loop_t* loop,
 | |
|                                   uv_fs_t* req,
 | |
|                                   uint32_t fsync_flags) {
 | |
|   struct uv__io_uring_sqe* sqe;
 | |
|   struct uv__iou* iou;
 | |
| 
 | |
|   iou = &uv__get_internal_fields(loop)->iou;
 | |
| 
 | |
|   sqe = uv__iou_get_sqe(iou, loop, req);
 | |
|   if (sqe == NULL)
 | |
|     return 0;
 | |
| 
 | |
|   /* Little known fact: setting seq->off and seq->len turns
 | |
|    * it into an asynchronous sync_file_range() operation.
 | |
|    */
 | |
|   sqe->fd = req->file;
 | |
|   sqe->fsync_flags = fsync_flags;
 | |
|   sqe->opcode = UV__IORING_OP_FSYNC;
 | |
| 
 | |
|   uv__iou_submit(iou);
 | |
| 
 | |
|   return 1;
 | |
| }
 | |
| 
 | |
| 
 | |
| int uv__iou_fs_link(uv_loop_t* loop, uv_fs_t* req) {
 | |
|   struct uv__io_uring_sqe* sqe;
 | |
|   struct uv__iou* iou;
 | |
| 
 | |
|   iou = &uv__get_internal_fields(loop)->iou;
 | |
| 
 | |
|   if (!(iou->flags & UV__MKDIRAT_SYMLINKAT_LINKAT))
 | |
|     return 0;
 | |
| 
 | |
|   sqe = uv__iou_get_sqe(iou, loop, req);
 | |
|   if (sqe == NULL)
 | |
|     return 0;
 | |
| 
 | |
|   sqe->addr = (uintptr_t) req->path;
 | |
|   sqe->fd = AT_FDCWD;
 | |
|   sqe->addr2 = (uintptr_t) req->new_path;
 | |
|   sqe->len = AT_FDCWD;
 | |
|   sqe->opcode = UV__IORING_OP_LINKAT;
 | |
| 
 | |
|   uv__iou_submit(iou);
 | |
| 
 | |
|   return 1;
 | |
| }
 | |
| 
 | |
| 
 | |
| int uv__iou_fs_mkdir(uv_loop_t* loop, uv_fs_t* req) {
 | |
|   struct uv__io_uring_sqe* sqe;
 | |
|   struct uv__iou* iou;
 | |
| 
 | |
|   iou = &uv__get_internal_fields(loop)->iou;
 | |
| 
 | |
|   if (!(iou->flags & UV__MKDIRAT_SYMLINKAT_LINKAT))
 | |
|     return 0;
 | |
| 
 | |
|   sqe = uv__iou_get_sqe(iou, loop, req);
 | |
|   if (sqe == NULL)
 | |
|     return 0;
 | |
| 
 | |
|   sqe->addr = (uintptr_t) req->path;
 | |
|   sqe->fd = AT_FDCWD;
 | |
|   sqe->len = req->mode;
 | |
|   sqe->opcode = UV__IORING_OP_MKDIRAT;
 | |
| 
 | |
|   uv__iou_submit(iou);
 | |
| 
 | |
|   return 1;
 | |
| }
 | |
| 
 | |
| 
 | |
| int uv__iou_fs_open(uv_loop_t* loop, uv_fs_t* req) {
 | |
|   struct uv__io_uring_sqe* sqe;
 | |
|   struct uv__iou* iou;
 | |
| 
 | |
|   iou = &uv__get_internal_fields(loop)->iou;
 | |
| 
 | |
|   sqe = uv__iou_get_sqe(iou, loop, req);
 | |
|   if (sqe == NULL)
 | |
|     return 0;
 | |
| 
 | |
|   sqe->addr = (uintptr_t) req->path;
 | |
|   sqe->fd = AT_FDCWD;
 | |
|   sqe->len = req->mode;
 | |
|   sqe->opcode = UV__IORING_OP_OPENAT;
 | |
|   sqe->open_flags = req->flags | O_CLOEXEC;
 | |
| 
 | |
|   uv__iou_submit(iou);
 | |
| 
 | |
|   return 1;
 | |
| }
 | |
| 
 | |
| 
 | |
| int uv__iou_fs_rename(uv_loop_t* loop, uv_fs_t* req) {
 | |
|   struct uv__io_uring_sqe* sqe;
 | |
|   struct uv__iou* iou;
 | |
| 
 | |
|   iou = &uv__get_internal_fields(loop)->iou;
 | |
| 
 | |
|   sqe = uv__iou_get_sqe(iou, loop, req);
 | |
|   if (sqe == NULL)
 | |
|     return 0;
 | |
| 
 | |
|   sqe->addr = (uintptr_t) req->path;
 | |
|   sqe->fd = AT_FDCWD;
 | |
|   sqe->addr2 = (uintptr_t) req->new_path;
 | |
|   sqe->len = AT_FDCWD;
 | |
|   sqe->opcode = UV__IORING_OP_RENAMEAT;
 | |
| 
 | |
|   uv__iou_submit(iou);
 | |
| 
 | |
|   return 1;
 | |
| }
 | |
| 
 | |
| 
 | |
| int uv__iou_fs_symlink(uv_loop_t* loop, uv_fs_t* req) {
 | |
|   struct uv__io_uring_sqe* sqe;
 | |
|   struct uv__iou* iou;
 | |
| 
 | |
|   iou = &uv__get_internal_fields(loop)->iou;
 | |
| 
 | |
|   if (!(iou->flags & UV__MKDIRAT_SYMLINKAT_LINKAT))
 | |
|     return 0;
 | |
| 
 | |
|   sqe = uv__iou_get_sqe(iou, loop, req);
 | |
|   if (sqe == NULL)
 | |
|     return 0;
 | |
| 
 | |
|   sqe->addr = (uintptr_t) req->path;
 | |
|   sqe->fd = AT_FDCWD;
 | |
|   sqe->addr2 = (uintptr_t) req->new_path;
 | |
|   sqe->opcode = UV__IORING_OP_SYMLINKAT;
 | |
| 
 | |
|   uv__iou_submit(iou);
 | |
| 
 | |
|   return 1;
 | |
| }
 | |
| 
 | |
| 
 | |
| int uv__iou_fs_unlink(uv_loop_t* loop, uv_fs_t* req) {
 | |
|   struct uv__io_uring_sqe* sqe;
 | |
|   struct uv__iou* iou;
 | |
| 
 | |
|   iou = &uv__get_internal_fields(loop)->iou;
 | |
| 
 | |
|   sqe = uv__iou_get_sqe(iou, loop, req);
 | |
|   if (sqe == NULL)
 | |
|     return 0;
 | |
| 
 | |
|   sqe->addr = (uintptr_t) req->path;
 | |
|   sqe->fd = AT_FDCWD;
 | |
|   sqe->opcode = UV__IORING_OP_UNLINKAT;
 | |
| 
 | |
|   uv__iou_submit(iou);
 | |
| 
 | |
|   return 1;
 | |
| }
 | |
| 
 | |
| 
 | |
| int uv__iou_fs_read_or_write(uv_loop_t* loop,
 | |
|                              uv_fs_t* req,
 | |
|                              int is_read) {
 | |
|   struct uv__io_uring_sqe* sqe;
 | |
|   struct uv__iou* iou;
 | |
| 
 | |
|   /* If iovcnt is greater than IOV_MAX, cap it to IOV_MAX on reads and fallback
 | |
|    * to the threadpool on writes */
 | |
|   if (req->nbufs > IOV_MAX) {
 | |
|     if (is_read)
 | |
|       req->nbufs = IOV_MAX;
 | |
|     else
 | |
|       return 0;
 | |
|   }
 | |
| 
 | |
|   iou = &uv__get_internal_fields(loop)->iou;
 | |
| 
 | |
|   sqe = uv__iou_get_sqe(iou, loop, req);
 | |
|   if (sqe == NULL)
 | |
|     return 0;
 | |
| 
 | |
|   sqe->addr = (uintptr_t) req->bufs;
 | |
|   sqe->fd = req->file;
 | |
|   sqe->len = req->nbufs;
 | |
|   sqe->off = req->off < 0 ? -1 : req->off;
 | |
|   sqe->opcode = is_read ? UV__IORING_OP_READV : UV__IORING_OP_WRITEV;
 | |
| 
 | |
|   uv__iou_submit(iou);
 | |
| 
 | |
|   return 1;
 | |
| }
 | |
| 
 | |
| 
 | |
| int uv__iou_fs_statx(uv_loop_t* loop,
 | |
|                      uv_fs_t* req,
 | |
|                      int is_fstat,
 | |
|                      int is_lstat) {
 | |
|   struct uv__io_uring_sqe* sqe;
 | |
|   struct uv__statx* statxbuf;
 | |
|   struct uv__iou* iou;
 | |
| 
 | |
|   statxbuf = uv__malloc(sizeof(*statxbuf));
 | |
|   if (statxbuf == NULL)
 | |
|     return 0;
 | |
| 
 | |
|   iou = &uv__get_internal_fields(loop)->iou;
 | |
| 
 | |
|   sqe = uv__iou_get_sqe(iou, loop, req);
 | |
|   if (sqe == NULL) {
 | |
|     uv__free(statxbuf);
 | |
|     return 0;
 | |
|   }
 | |
| 
 | |
|   req->ptr = statxbuf;
 | |
| 
 | |
|   sqe->addr = (uintptr_t) req->path;
 | |
|   sqe->addr2 = (uintptr_t) statxbuf;
 | |
|   sqe->fd = AT_FDCWD;
 | |
|   sqe->len = 0xFFF; /* STATX_BASIC_STATS + STATX_BTIME */
 | |
|   sqe->opcode = UV__IORING_OP_STATX;
 | |
| 
 | |
|   if (is_fstat) {
 | |
|     sqe->addr = (uintptr_t) "";
 | |
|     sqe->fd = req->file;
 | |
|     sqe->statx_flags |= 0x1000; /* AT_EMPTY_PATH */
 | |
|   }
 | |
| 
 | |
|   if (is_lstat)
 | |
|     sqe->statx_flags |= AT_SYMLINK_NOFOLLOW;
 | |
| 
 | |
|   uv__iou_submit(iou);
 | |
| 
 | |
|   return 1;
 | |
| }
 | |
| 
 | |
| 
 | |
| void uv__statx_to_stat(const struct uv__statx* statxbuf, uv_stat_t* buf) {
 | |
|   buf->st_dev = makedev(statxbuf->stx_dev_major, statxbuf->stx_dev_minor);
 | |
|   buf->st_mode = statxbuf->stx_mode;
 | |
|   buf->st_nlink = statxbuf->stx_nlink;
 | |
|   buf->st_uid = statxbuf->stx_uid;
 | |
|   buf->st_gid = statxbuf->stx_gid;
 | |
|   buf->st_rdev = makedev(statxbuf->stx_rdev_major, statxbuf->stx_rdev_minor);
 | |
|   buf->st_ino = statxbuf->stx_ino;
 | |
|   buf->st_size = statxbuf->stx_size;
 | |
|   buf->st_blksize = statxbuf->stx_blksize;
 | |
|   buf->st_blocks = statxbuf->stx_blocks;
 | |
|   buf->st_atim.tv_sec = statxbuf->stx_atime.tv_sec;
 | |
|   buf->st_atim.tv_nsec = statxbuf->stx_atime.tv_nsec;
 | |
|   buf->st_mtim.tv_sec = statxbuf->stx_mtime.tv_sec;
 | |
|   buf->st_mtim.tv_nsec = statxbuf->stx_mtime.tv_nsec;
 | |
|   buf->st_ctim.tv_sec = statxbuf->stx_ctime.tv_sec;
 | |
|   buf->st_ctim.tv_nsec = statxbuf->stx_ctime.tv_nsec;
 | |
|   buf->st_birthtim.tv_sec = statxbuf->stx_btime.tv_sec;
 | |
|   buf->st_birthtim.tv_nsec = statxbuf->stx_btime.tv_nsec;
 | |
|   buf->st_flags = 0;
 | |
|   buf->st_gen = 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void uv__iou_fs_statx_post(uv_fs_t* req) {
 | |
|   struct uv__statx* statxbuf;
 | |
|   uv_stat_t* buf;
 | |
| 
 | |
|   buf = &req->statbuf;
 | |
|   statxbuf = req->ptr;
 | |
|   req->ptr = NULL;
 | |
| 
 | |
|   if (req->result == 0) {
 | |
|     uv__msan_unpoison(statxbuf, sizeof(*statxbuf));
 | |
|     uv__statx_to_stat(statxbuf, buf);
 | |
|     req->ptr = buf;
 | |
|   }
 | |
| 
 | |
|   uv__free(statxbuf);
 | |
| }
 | |
| 
 | |
| 
 | |
| static void uv__poll_io_uring(uv_loop_t* loop, struct uv__iou* iou) {
 | |
|   struct uv__io_uring_cqe* cqe;
 | |
|   struct uv__io_uring_cqe* e;
 | |
|   uv_fs_t* req;
 | |
|   uint32_t head;
 | |
|   uint32_t tail;
 | |
|   uint32_t mask;
 | |
|   uint32_t i;
 | |
|   uint32_t flags;
 | |
|   int nevents;
 | |
|   int rc;
 | |
| 
 | |
|   head = *iou->cqhead;
 | |
|   tail = atomic_load_explicit((_Atomic uint32_t*) iou->cqtail,
 | |
|                               memory_order_acquire);
 | |
|   mask = iou->cqmask;
 | |
|   cqe = iou->cqe;
 | |
|   nevents = 0;
 | |
| 
 | |
|   for (i = head; i != tail; i++) {
 | |
|     e = &cqe[i & mask];
 | |
| 
 | |
|     req = (uv_fs_t*) (uintptr_t) e->user_data;
 | |
|     assert(req->type == UV_FS);
 | |
| 
 | |
|     uv__req_unregister(loop, req);
 | |
|     iou->in_flight--;
 | |
| 
 | |
|     /* io_uring stores error codes as negative numbers, same as libuv. */
 | |
|     req->result = e->res;
 | |
| 
 | |
|     switch (req->fs_type) {
 | |
|       case UV_FS_FSTAT:
 | |
|       case UV_FS_LSTAT:
 | |
|       case UV_FS_STAT:
 | |
|         uv__iou_fs_statx_post(req);
 | |
|         break;
 | |
|       default:  /* Squelch -Wswitch warnings. */
 | |
|         break;
 | |
|     }
 | |
| 
 | |
|     uv__metrics_update_idle_time(loop);
 | |
|     req->cb(req);
 | |
|     nevents++;
 | |
|   }
 | |
| 
 | |
|   atomic_store_explicit((_Atomic uint32_t*) iou->cqhead,
 | |
|                         tail,
 | |
|                         memory_order_release);
 | |
| 
 | |
|   /* Check whether CQE's overflowed, if so enter the kernel to make them
 | |
|    * available. Don't grab them immediately but in the next loop iteration to
 | |
|    * avoid loop starvation. */
 | |
|   flags = atomic_load_explicit((_Atomic uint32_t*) iou->sqflags,
 | |
|                                memory_order_acquire);
 | |
| 
 | |
|   if (flags & UV__IORING_SQ_CQ_OVERFLOW) {
 | |
|     do
 | |
|       rc = uv__io_uring_enter(iou->ringfd, 0, 0, UV__IORING_ENTER_GETEVENTS);
 | |
|     while (rc == -1 && errno == EINTR);
 | |
| 
 | |
|     if (rc < 0)
 | |
|       perror("libuv: io_uring_enter(getevents)");  /* Can't happen. */
 | |
|   }
 | |
| 
 | |
|   uv__metrics_inc_events(loop, nevents);
 | |
|   if (uv__get_internal_fields(loop)->current_timeout == 0)
 | |
|     uv__metrics_inc_events_waiting(loop, nevents);
 | |
| }
 | |
| 
 | |
| 
 | |
| static void uv__epoll_ctl_prep(int epollfd,
 | |
|                                struct uv__iou* ctl,
 | |
|                                struct epoll_event (*events)[256],
 | |
|                                int op,
 | |
|                                int fd,
 | |
|                                struct epoll_event* e) {
 | |
|   struct uv__io_uring_sqe* sqe;
 | |
|   struct epoll_event* pe;
 | |
|   uint32_t mask;
 | |
|   uint32_t slot;
 | |
| 
 | |
|   if (ctl->ringfd == -1) {
 | |
|     if (!epoll_ctl(epollfd, op, fd, e))
 | |
|       return;
 | |
| 
 | |
|     if (op == EPOLL_CTL_DEL)
 | |
|       return;  /* Ignore errors, may be racing with another thread. */
 | |
| 
 | |
|     if (op != EPOLL_CTL_ADD)
 | |
|       abort();
 | |
| 
 | |
|     if (errno != EEXIST)
 | |
|       abort();
 | |
| 
 | |
|     /* File descriptor that's been watched before, update event mask. */
 | |
|     if (!epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, e))
 | |
|       return;
 | |
| 
 | |
|     abort();
 | |
|   } else {
 | |
|     mask = ctl->sqmask;
 | |
|     slot = (*ctl->sqtail)++ & mask;
 | |
| 
 | |
|     pe = &(*events)[slot];
 | |
|     *pe = *e;
 | |
| 
 | |
|     sqe = ctl->sqe;
 | |
|     sqe = &sqe[slot];
 | |
| 
 | |
|     memset(sqe, 0, sizeof(*sqe));
 | |
|     sqe->addr = (uintptr_t) pe;
 | |
|     sqe->fd = epollfd;
 | |
|     sqe->len = op;
 | |
|     sqe->off = fd;
 | |
|     sqe->opcode = UV__IORING_OP_EPOLL_CTL;
 | |
|     sqe->user_data = op | slot << 2 | (int64_t) fd << 32;
 | |
| 
 | |
|     if ((*ctl->sqhead & mask) == (*ctl->sqtail & mask))
 | |
|       uv__epoll_ctl_flush(epollfd, ctl, events);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static void uv__epoll_ctl_flush(int epollfd,
 | |
|                                 struct uv__iou* ctl,
 | |
|                                 struct epoll_event (*events)[256]) {
 | |
|   struct epoll_event oldevents[256];
 | |
|   struct uv__io_uring_cqe* cqe;
 | |
|   uint32_t oldslot;
 | |
|   uint32_t slot;
 | |
|   uint32_t n;
 | |
|   int fd;
 | |
|   int op;
 | |
|   int rc;
 | |
| 
 | |
|   STATIC_ASSERT(sizeof(oldevents) == sizeof(*events));
 | |
|   assert(ctl->ringfd != -1);
 | |
|   assert(*ctl->sqhead != *ctl->sqtail);
 | |
| 
 | |
|   n = *ctl->sqtail - *ctl->sqhead;
 | |
|   do
 | |
|     rc = uv__io_uring_enter(ctl->ringfd, n, n, UV__IORING_ENTER_GETEVENTS);
 | |
|   while (rc == -1 && errno == EINTR);
 | |
| 
 | |
|   if (rc < 0)
 | |
|     perror("libuv: io_uring_enter(getevents)");  /* Can't happen. */
 | |
| 
 | |
|   if (rc != (int) n)
 | |
|     abort();
 | |
| 
 | |
|   assert(*ctl->sqhead == *ctl->sqtail);
 | |
| 
 | |
|   memcpy(oldevents, *events, sizeof(*events));
 | |
| 
 | |
|   /* Failed submissions are either EPOLL_CTL_DEL commands for file descriptors
 | |
|    * that have been closed, or EPOLL_CTL_ADD commands for file descriptors
 | |
|    * that we are already watching. Ignore the former and retry the latter
 | |
|    * with EPOLL_CTL_MOD.
 | |
|    */
 | |
|   while (*ctl->cqhead != *ctl->cqtail) {
 | |
|     slot = (*ctl->cqhead)++ & ctl->cqmask;
 | |
| 
 | |
|     cqe = ctl->cqe;
 | |
|     cqe = &cqe[slot];
 | |
| 
 | |
|     if (cqe->res == 0)
 | |
|       continue;
 | |
| 
 | |
|     fd = cqe->user_data >> 32;
 | |
|     op = 3 & cqe->user_data;
 | |
|     oldslot = 255 & (cqe->user_data >> 2);
 | |
| 
 | |
|     if (op == EPOLL_CTL_DEL)
 | |
|       continue;
 | |
| 
 | |
|     if (op != EPOLL_CTL_ADD)
 | |
|       abort();
 | |
| 
 | |
|     if (cqe->res != -EEXIST)
 | |
|       abort();
 | |
| 
 | |
|     uv__epoll_ctl_prep(epollfd,
 | |
|                        ctl,
 | |
|                        events,
 | |
|                        EPOLL_CTL_MOD,
 | |
|                        fd,
 | |
|                        &oldevents[oldslot]);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| void uv__io_poll(uv_loop_t* loop, int timeout) {
 | |
|   uv__loop_internal_fields_t* lfields;
 | |
|   struct epoll_event events[1024];
 | |
|   struct epoll_event prep[256];
 | |
|   struct uv__invalidate inv;
 | |
|   struct epoll_event* pe;
 | |
|   struct epoll_event e;
 | |
|   struct uv__iou* ctl;
 | |
|   struct uv__iou* iou;
 | |
|   int real_timeout;
 | |
|   struct uv__queue* q;
 | |
|   uv__io_t* w;
 | |
|   sigset_t* sigmask;
 | |
|   sigset_t sigset;
 | |
|   uint64_t base;
 | |
|   int have_iou_events;
 | |
|   int have_signals;
 | |
|   int nevents;
 | |
|   int epollfd;
 | |
|   int count;
 | |
|   int nfds;
 | |
|   int fd;
 | |
|   int op;
 | |
|   int i;
 | |
|   int user_timeout;
 | |
|   int reset_timeout;
 | |
| 
 | |
|   lfields = uv__get_internal_fields(loop);
 | |
|   ctl = &lfields->ctl;
 | |
|   iou = &lfields->iou;
 | |
| 
 | |
|   sigmask = NULL;
 | |
|   if (loop->flags & UV_LOOP_BLOCK_SIGPROF) {
 | |
|     sigemptyset(&sigset);
 | |
|     sigaddset(&sigset, SIGPROF);
 | |
|     sigmask = &sigset;
 | |
|   }
 | |
| 
 | |
|   assert(timeout >= -1);
 | |
|   base = loop->time;
 | |
|   count = 48; /* Benchmarks suggest this gives the best throughput. */
 | |
|   real_timeout = timeout;
 | |
| 
 | |
|   if (lfields->flags & UV_METRICS_IDLE_TIME) {
 | |
|     reset_timeout = 1;
 | |
|     user_timeout = timeout;
 | |
|     timeout = 0;
 | |
|   } else {
 | |
|     reset_timeout = 0;
 | |
|     user_timeout = 0;
 | |
|   }
 | |
| 
 | |
|   epollfd = loop->backend_fd;
 | |
| 
 | |
|   memset(&e, 0, sizeof(e));
 | |
| 
 | |
|   while (!uv__queue_empty(&loop->watcher_queue)) {
 | |
|     q = uv__queue_head(&loop->watcher_queue);
 | |
|     w = uv__queue_data(q, uv__io_t, watcher_queue);
 | |
|     uv__queue_remove(q);
 | |
|     uv__queue_init(q);
 | |
| 
 | |
|     op = EPOLL_CTL_MOD;
 | |
|     if (w->events == 0)
 | |
|       op = EPOLL_CTL_ADD;
 | |
| 
 | |
|     w->events = w->pevents;
 | |
|     e.events = w->pevents;
 | |
|     e.data.fd = w->fd;
 | |
| 
 | |
|     uv__epoll_ctl_prep(epollfd, ctl, &prep, op, w->fd, &e);
 | |
|   }
 | |
| 
 | |
|   inv.events = events;
 | |
|   inv.prep = &prep;
 | |
|   inv.nfds = -1;
 | |
| 
 | |
|   for (;;) {
 | |
|     if (loop->nfds == 0)
 | |
|       if (iou->in_flight == 0)
 | |
|         break;
 | |
| 
 | |
|     /* All event mask mutations should be visible to the kernel before
 | |
|      * we enter epoll_pwait().
 | |
|      */
 | |
|     if (ctl->ringfd != -1)
 | |
|       while (*ctl->sqhead != *ctl->sqtail)
 | |
|         uv__epoll_ctl_flush(epollfd, ctl, &prep);
 | |
| 
 | |
|     /* Only need to set the provider_entry_time if timeout != 0. The function
 | |
|      * will return early if the loop isn't configured with UV_METRICS_IDLE_TIME.
 | |
|      */
 | |
|     if (timeout != 0)
 | |
|       uv__metrics_set_provider_entry_time(loop);
 | |
| 
 | |
|     /* Store the current timeout in a location that's globally accessible so
 | |
|      * other locations like uv__work_done() can determine whether the queue
 | |
|      * of events in the callback were waiting when poll was called.
 | |
|      */
 | |
|     lfields->current_timeout = timeout;
 | |
| 
 | |
|     nfds = epoll_pwait(epollfd, events, ARRAY_SIZE(events), timeout, sigmask);
 | |
| 
 | |
|     /* Update loop->time unconditionally. It's tempting to skip the update when
 | |
|      * timeout == 0 (i.e. non-blocking poll) but there is no guarantee that the
 | |
|      * operating system didn't reschedule our process while in the syscall.
 | |
|      */
 | |
|     SAVE_ERRNO(uv__update_time(loop));
 | |
| 
 | |
|     if (nfds == 0) {
 | |
|       assert(timeout != -1);
 | |
| 
 | |
|       if (reset_timeout != 0) {
 | |
|         timeout = user_timeout;
 | |
|         reset_timeout = 0;
 | |
|       }
 | |
| 
 | |
|       if (timeout == -1)
 | |
|         continue;
 | |
| 
 | |
|       if (timeout == 0)
 | |
|         break;
 | |
| 
 | |
|       /* We may have been inside the system call for longer than |timeout|
 | |
|        * milliseconds so we need to update the timestamp to avoid drift.
 | |
|        */
 | |
|       goto update_timeout;
 | |
|     }
 | |
| 
 | |
|     if (nfds == -1) {
 | |
|       if (errno != EINTR)
 | |
|         abort();
 | |
| 
 | |
|       if (reset_timeout != 0) {
 | |
|         timeout = user_timeout;
 | |
|         reset_timeout = 0;
 | |
|       }
 | |
| 
 | |
|       if (timeout == -1)
 | |
|         continue;
 | |
| 
 | |
|       if (timeout == 0)
 | |
|         break;
 | |
| 
 | |
|       /* Interrupted by a signal. Update timeout and poll again. */
 | |
|       goto update_timeout;
 | |
|     }
 | |
| 
 | |
|     have_iou_events = 0;
 | |
|     have_signals = 0;
 | |
|     nevents = 0;
 | |
| 
 | |
|     inv.nfds = nfds;
 | |
|     lfields->inv = &inv;
 | |
| 
 | |
|     for (i = 0; i < nfds; i++) {
 | |
|       pe = events + i;
 | |
|       fd = pe->data.fd;
 | |
| 
 | |
|       /* Skip invalidated events, see uv__platform_invalidate_fd */
 | |
|       if (fd == -1)
 | |
|         continue;
 | |
| 
 | |
|       if (fd == iou->ringfd) {
 | |
|         uv__poll_io_uring(loop, iou);
 | |
|         have_iou_events = 1;
 | |
|         continue;
 | |
|       }
 | |
| 
 | |
|       assert(fd >= 0);
 | |
|       assert((unsigned) fd < loop->nwatchers);
 | |
| 
 | |
|       w = loop->watchers[fd];
 | |
| 
 | |
|       if (w == NULL) {
 | |
|         /* File descriptor that we've stopped watching, disarm it.
 | |
|          *
 | |
|          * Ignore all errors because we may be racing with another thread
 | |
|          * when the file descriptor is closed.
 | |
|          */
 | |
|         uv__epoll_ctl_prep(epollfd, ctl, &prep, EPOLL_CTL_DEL, fd, pe);
 | |
|         continue;
 | |
|       }
 | |
| 
 | |
|       /* Give users only events they're interested in. Prevents spurious
 | |
|        * callbacks when previous callback invocation in this loop has stopped
 | |
|        * the current watcher. Also, filters out events that users has not
 | |
|        * requested us to watch.
 | |
|        */
 | |
|       pe->events &= w->pevents | POLLERR | POLLHUP;
 | |
| 
 | |
|       /* Work around an epoll quirk where it sometimes reports just the
 | |
|        * EPOLLERR or EPOLLHUP event.  In order to force the event loop to
 | |
|        * move forward, we merge in the read/write events that the watcher
 | |
|        * is interested in; uv__read() and uv__write() will then deal with
 | |
|        * the error or hangup in the usual fashion.
 | |
|        *
 | |
|        * Note to self: happens when epoll reports EPOLLIN|EPOLLHUP, the user
 | |
|        * reads the available data, calls uv_read_stop(), then sometime later
 | |
|        * calls uv_read_start() again.  By then, libuv has forgotten about the
 | |
|        * hangup and the kernel won't report EPOLLIN again because there's
 | |
|        * nothing left to read.  If anything, libuv is to blame here.  The
 | |
|        * current hack is just a quick bandaid; to properly fix it, libuv
 | |
|        * needs to remember the error/hangup event.  We should get that for
 | |
|        * free when we switch over to edge-triggered I/O.
 | |
|        */
 | |
|       if (pe->events == POLLERR || pe->events == POLLHUP)
 | |
|         pe->events |=
 | |
|           w->pevents & (POLLIN | POLLOUT | UV__POLLRDHUP | UV__POLLPRI);
 | |
| 
 | |
|       if (pe->events != 0) {
 | |
|         /* Run signal watchers last.  This also affects child process watchers
 | |
|          * because those are implemented in terms of signal watchers.
 | |
|          */
 | |
|         if (w == &loop->signal_io_watcher) {
 | |
|           have_signals = 1;
 | |
|         } else {
 | |
|           uv__metrics_update_idle_time(loop);
 | |
|           w->cb(loop, w, pe->events);
 | |
|         }
 | |
| 
 | |
|         nevents++;
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     uv__metrics_inc_events(loop, nevents);
 | |
|     if (reset_timeout != 0) {
 | |
|       timeout = user_timeout;
 | |
|       reset_timeout = 0;
 | |
|       uv__metrics_inc_events_waiting(loop, nevents);
 | |
|     }
 | |
| 
 | |
|     if (have_signals != 0) {
 | |
|       uv__metrics_update_idle_time(loop);
 | |
|       loop->signal_io_watcher.cb(loop, &loop->signal_io_watcher, POLLIN);
 | |
|     }
 | |
| 
 | |
|     lfields->inv = NULL;
 | |
| 
 | |
|     if (have_iou_events != 0)
 | |
|       break;  /* Event loop should cycle now so don't poll again. */
 | |
| 
 | |
|     if (have_signals != 0)
 | |
|       break;  /* Event loop should cycle now so don't poll again. */
 | |
| 
 | |
|     if (nevents != 0) {
 | |
|       if (nfds == ARRAY_SIZE(events) && --count != 0) {
 | |
|         /* Poll for more events but don't block this time. */
 | |
|         timeout = 0;
 | |
|         continue;
 | |
|       }
 | |
|       break;
 | |
|     }
 | |
| 
 | |
|     if (timeout == 0)
 | |
|       break;
 | |
| 
 | |
|     if (timeout == -1)
 | |
|       continue;
 | |
| 
 | |
| update_timeout:
 | |
|     assert(timeout > 0);
 | |
| 
 | |
|     real_timeout -= (loop->time - base);
 | |
|     if (real_timeout <= 0)
 | |
|       break;
 | |
| 
 | |
|     timeout = real_timeout;
 | |
|   }
 | |
| 
 | |
|   if (ctl->ringfd != -1)
 | |
|     while (*ctl->sqhead != *ctl->sqtail)
 | |
|       uv__epoll_ctl_flush(epollfd, ctl, &prep);
 | |
| }
 | |
| 
 | |
| uint64_t uv__hrtime(uv_clocktype_t type) {
 | |
|   static _Atomic clock_t fast_clock_id = -1;
 | |
|   struct timespec t;
 | |
|   clock_t clock_id;
 | |
| 
 | |
|   /* Prefer CLOCK_MONOTONIC_COARSE if available but only when it has
 | |
|    * millisecond granularity or better.  CLOCK_MONOTONIC_COARSE is
 | |
|    * serviced entirely from the vDSO, whereas CLOCK_MONOTONIC may
 | |
|    * decide to make a costly system call.
 | |
|    */
 | |
|   /* TODO(bnoordhuis) Use CLOCK_MONOTONIC_COARSE for UV_CLOCK_PRECISE
 | |
|    * when it has microsecond granularity or better (unlikely).
 | |
|    */
 | |
|   clock_id = CLOCK_MONOTONIC;
 | |
|   if (type != UV_CLOCK_FAST)
 | |
|     goto done;
 | |
| 
 | |
|   clock_id = atomic_load_explicit(&fast_clock_id, memory_order_relaxed);
 | |
|   if (clock_id != -1)
 | |
|     goto done;
 | |
| 
 | |
|   clock_id = CLOCK_MONOTONIC;
 | |
|   if (0 == clock_getres(CLOCK_MONOTONIC_COARSE, &t))
 | |
|     if (t.tv_nsec <= 1 * 1000 * 1000)
 | |
|       clock_id = CLOCK_MONOTONIC_COARSE;
 | |
| 
 | |
|   atomic_store_explicit(&fast_clock_id, clock_id, memory_order_relaxed);
 | |
| 
 | |
| done:
 | |
| 
 | |
|   if (clock_gettime(clock_id, &t))
 | |
|     return 0;  /* Not really possible. */
 | |
| 
 | |
|   return t.tv_sec * (uint64_t) 1e9 + t.tv_nsec;
 | |
| }
 | |
| 
 | |
| 
 | |
| int uv_resident_set_memory(size_t* rss) {
 | |
|   char buf[1024];
 | |
|   const char* s;
 | |
|   ssize_t n;
 | |
|   long val;
 | |
|   int fd;
 | |
|   int i;
 | |
| 
 | |
|   do
 | |
|     fd = open("/proc/self/stat", O_RDONLY);
 | |
|   while (fd == -1 && errno == EINTR);
 | |
| 
 | |
|   if (fd == -1)
 | |
|     return UV__ERR(errno);
 | |
| 
 | |
|   do
 | |
|     n = read(fd, buf, sizeof(buf) - 1);
 | |
|   while (n == -1 && errno == EINTR);
 | |
| 
 | |
|   uv__close(fd);
 | |
|   if (n == -1)
 | |
|     return UV__ERR(errno);
 | |
|   buf[n] = '\0';
 | |
| 
 | |
|   s = strchr(buf, ' ');
 | |
|   if (s == NULL)
 | |
|     goto err;
 | |
| 
 | |
|   s += 1;
 | |
|   if (*s != '(')
 | |
|     goto err;
 | |
| 
 | |
|   s = strchr(s, ')');
 | |
|   if (s == NULL)
 | |
|     goto err;
 | |
| 
 | |
|   for (i = 1; i <= 22; i++) {
 | |
|     s = strchr(s + 1, ' ');
 | |
|     if (s == NULL)
 | |
|       goto err;
 | |
|   }
 | |
| 
 | |
|   errno = 0;
 | |
|   val = strtol(s, NULL, 10);
 | |
|   if (errno != 0)
 | |
|     goto err;
 | |
|   if (val < 0)
 | |
|     goto err;
 | |
| 
 | |
|   *rss = val * getpagesize();
 | |
|   return 0;
 | |
| 
 | |
| err:
 | |
|   return UV_EINVAL;
 | |
| }
 | |
| 
 | |
| int uv_uptime(double* uptime) {
 | |
|   struct timespec now;
 | |
|   char buf[128];
 | |
| 
 | |
|   /* Consult /proc/uptime when present (common case), or fall back to
 | |
|    * clock_gettime. Why not always clock_gettime? It doesn't always return the
 | |
|    * right result under OpenVZ and possibly other containerized environments.
 | |
|    */
 | |
|   if (0 == uv__slurp("/proc/uptime", buf, sizeof(buf)))
 | |
|     if (1 == sscanf(buf, "%lf", uptime))
 | |
|       return 0;
 | |
| 
 | |
|   if (clock_gettime(CLOCK_BOOTTIME, &now))
 | |
|     return UV__ERR(errno);
 | |
| 
 | |
|   *uptime = now.tv_sec;
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| int uv_cpu_info(uv_cpu_info_t** ci, int* count) {
 | |
| #if defined(__PPC__)
 | |
|   static const char model_marker[] = "cpu\t\t: ";
 | |
| #elif defined(__arm__)
 | |
|   static const char model_marker[] = "Processor\t: ";
 | |
| #elif defined(__aarch64__)
 | |
|   static const char model_marker[] = "CPU part\t: ";
 | |
| #elif defined(__mips__)
 | |
|   static const char model_marker[] = "cpu model\t\t: ";
 | |
| #elif defined(__loongarch__)
 | |
|   static const char model_marker[] = "cpu family\t\t: ";
 | |
| #else
 | |
|   static const char model_marker[] = "model name\t: ";
 | |
| #endif
 | |
|   static const char parts[] =
 | |
| #ifdef __aarch64__
 | |
|     "0x811\nARM810\n"       "0x920\nARM920\n"      "0x922\nARM922\n"
 | |
|     "0x926\nARM926\n"       "0x940\nARM940\n"      "0x946\nARM946\n"
 | |
|     "0x966\nARM966\n"       "0xa20\nARM1020\n"      "0xa22\nARM1022\n"
 | |
|     "0xa26\nARM1026\n"      "0xb02\nARM11 MPCore\n" "0xb36\nARM1136\n"
 | |
|     "0xb56\nARM1156\n"      "0xb76\nARM1176\n"      "0xc05\nCortex-A5\n"
 | |
|     "0xc07\nCortex-A7\n"    "0xc08\nCortex-A8\n"    "0xc09\nCortex-A9\n"
 | |
|     "0xc0d\nCortex-A17\n"   /* Originally A12 */
 | |
|     "0xc0f\nCortex-A15\n"   "0xc0e\nCortex-A17\n"   "0xc14\nCortex-R4\n"
 | |
|     "0xc15\nCortex-R5\n"    "0xc17\nCortex-R7\n"    "0xc18\nCortex-R8\n"
 | |
|     "0xc20\nCortex-M0\n"    "0xc21\nCortex-M1\n"    "0xc23\nCortex-M3\n"
 | |
|     "0xc24\nCortex-M4\n"    "0xc27\nCortex-M7\n"    "0xc60\nCortex-M0+\n"
 | |
|     "0xd01\nCortex-A32\n"   "0xd03\nCortex-A53\n"   "0xd04\nCortex-A35\n"
 | |
|     "0xd05\nCortex-A55\n"   "0xd06\nCortex-A65\n"   "0xd07\nCortex-A57\n"
 | |
|     "0xd08\nCortex-A72\n"   "0xd09\nCortex-A73\n"   "0xd0a\nCortex-A75\n"
 | |
|     "0xd0b\nCortex-A76\n"   "0xd0c\nNeoverse-N1\n"  "0xd0d\nCortex-A77\n"
 | |
|     "0xd0e\nCortex-A76AE\n" "0xd13\nCortex-R52\n"   "0xd20\nCortex-M23\n"
 | |
|     "0xd21\nCortex-M33\n"   "0xd41\nCortex-A78\n"   "0xd42\nCortex-A78AE\n"
 | |
|     "0xd4a\nNeoverse-E1\n"  "0xd4b\nCortex-A78C\n"
 | |
| #endif
 | |
|     "";
 | |
|   struct cpu {
 | |
|     unsigned long long freq, user, nice, sys, idle, irq;
 | |
|     unsigned model;
 | |
|   };
 | |
|   FILE* fp;
 | |
|   char* p;
 | |
|   int found;
 | |
|   int n;
 | |
|   unsigned i;
 | |
|   unsigned cpu;
 | |
|   unsigned maxcpu;
 | |
|   unsigned size;
 | |
|   unsigned long long skip;
 | |
|   struct cpu (*cpus)[8192];  /* Kernel maximum. */
 | |
|   struct cpu* c;
 | |
|   struct cpu t;
 | |
|   char (*model)[64];
 | |
|   unsigned char bitmap[ARRAY_SIZE(*cpus) / 8];
 | |
|   /* Assumption: even big.LITTLE systems will have only a handful
 | |
|    * of different CPU models. Most systems will just have one.
 | |
|    */
 | |
|   char models[8][64];
 | |
|   char buf[1024];
 | |
| 
 | |
|   memset(bitmap, 0, sizeof(bitmap));
 | |
|   memset(models, 0, sizeof(models));
 | |
|   snprintf(*models, sizeof(*models), "unknown");
 | |
|   maxcpu = 0;
 | |
| 
 | |
|   cpus = uv__calloc(ARRAY_SIZE(*cpus), sizeof(**cpus));
 | |
|   if (cpus == NULL)
 | |
|     return UV_ENOMEM;
 | |
| 
 | |
|   fp = uv__open_file("/proc/stat");
 | |
|   if (fp == NULL) {
 | |
|     uv__free(cpus);
 | |
|     return UV__ERR(errno);
 | |
|   }
 | |
| 
 | |
|   fgets(buf, sizeof(buf), fp);  /* Skip first line. */
 | |
| 
 | |
|   for (;;) {
 | |
|     memset(&t, 0, sizeof(t));
 | |
| 
 | |
|     n = fscanf(fp, "cpu%u %llu %llu %llu %llu %llu %llu",
 | |
|                &cpu, &t.user, &t.nice, &t.sys, &t.idle, &skip, &t.irq);
 | |
| 
 | |
|     if (n != 7)
 | |
|       break;
 | |
| 
 | |
|     fgets(buf, sizeof(buf), fp);  /* Skip rest of line. */
 | |
| 
 | |
|     if (cpu >= ARRAY_SIZE(*cpus))
 | |
|       continue;
 | |
| 
 | |
|     (*cpus)[cpu] = t;
 | |
| 
 | |
|     bitmap[cpu >> 3] |= 1 << (cpu & 7);
 | |
| 
 | |
|     if (cpu >= maxcpu)
 | |
|       maxcpu = cpu + 1;
 | |
|   }
 | |
| 
 | |
|   fclose(fp);
 | |
| 
 | |
|   fp = uv__open_file("/proc/cpuinfo");
 | |
|   if (fp == NULL)
 | |
|     goto nocpuinfo;
 | |
| 
 | |
|   for (;;) {
 | |
|     if (1 != fscanf(fp, "processor\t: %u\n", &cpu))
 | |
|       break;  /* Parse error. */
 | |
| 
 | |
|     found = 0;
 | |
|     while (!found && fgets(buf, sizeof(buf), fp))
 | |
|       found = !strncmp(buf, model_marker, sizeof(model_marker) - 1);
 | |
| 
 | |
|     if (!found)
 | |
|       goto next;
 | |
| 
 | |
|     p = buf + sizeof(model_marker) - 1;
 | |
|     n = (int) strcspn(p, "\n");
 | |
| 
 | |
|     /* arm64: translate CPU part code to model name. */
 | |
|     if (*parts) {
 | |
|       p = memmem(parts, sizeof(parts) - 1, p, n + 1);
 | |
|       if (p == NULL)
 | |
|         p = "unknown";
 | |
|       else
 | |
|         p += n + 1;
 | |
|       n = (int) strcspn(p, "\n");
 | |
|     }
 | |
| 
 | |
|     found = 0;
 | |
|     for (model = models; !found && model < ARRAY_END(models); model++)
 | |
|       found = !strncmp(p, *model, strlen(*model));
 | |
| 
 | |
|     if (!found)
 | |
|       goto next;
 | |
| 
 | |
|     if (**model == '\0')
 | |
|       snprintf(*model, sizeof(*model), "%.*s", n, p);
 | |
| 
 | |
|     if (cpu < maxcpu)
 | |
|       (*cpus)[cpu].model = model - models;
 | |
| 
 | |
| next:
 | |
|     while (fgets(buf, sizeof(buf), fp))
 | |
|       if (*buf == '\n')
 | |
|         break;
 | |
|   }
 | |
| 
 | |
|   fclose(fp);
 | |
|   fp = NULL;
 | |
| 
 | |
| nocpuinfo:
 | |
| 
 | |
|   n = 0;
 | |
|   for (cpu = 0; cpu < maxcpu; cpu++) {
 | |
|     if (!(bitmap[cpu >> 3] & (1 << (cpu & 7))))
 | |
|       continue;
 | |
| 
 | |
|     n++;
 | |
|     snprintf(buf, sizeof(buf),
 | |
|              "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_cur_freq", cpu);
 | |
| 
 | |
|     fp = uv__open_file(buf);
 | |
|     if (fp == NULL)
 | |
|       continue;
 | |
| 
 | |
|     fscanf(fp, "%llu", &(*cpus)[cpu].freq);
 | |
|     fclose(fp);
 | |
|     fp = NULL;
 | |
|   }
 | |
| 
 | |
|   size = n * sizeof(**ci) + sizeof(models);
 | |
|   *ci = uv__malloc(size);
 | |
|   *count = 0;
 | |
| 
 | |
|   if (*ci == NULL) {
 | |
|     uv__free(cpus);
 | |
|     return UV_ENOMEM;
 | |
|   }
 | |
| 
 | |
|   *count = n;
 | |
|   p = memcpy(*ci + n, models, sizeof(models));
 | |
| 
 | |
|   i = 0;
 | |
|   for (cpu = 0; cpu < maxcpu; cpu++) {
 | |
|     if (!(bitmap[cpu >> 3] & (1 << (cpu & 7))))
 | |
|       continue;
 | |
| 
 | |
|     c = *cpus + cpu;
 | |
| 
 | |
|     (*ci)[i++] = (uv_cpu_info_t) {
 | |
|       .model     = p + c->model * sizeof(*model),
 | |
|       .speed     = c->freq / 1000,
 | |
|       /* Note: sysconf(_SC_CLK_TCK) is fixed at 100 Hz,
 | |
|        * therefore the multiplier is always 1000/100 = 10.
 | |
|        */
 | |
|       .cpu_times = (struct uv_cpu_times_s) {
 | |
|         .user = 10 * c->user,
 | |
|         .nice = 10 * c->nice,
 | |
|         .sys  = 10 * c->sys,
 | |
|         .idle = 10 * c->idle,
 | |
|         .irq  = 10 * c->irq,
 | |
|       },
 | |
|     };
 | |
|   }
 | |
| 
 | |
|   uv__free(cpus);
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| #ifdef HAVE_IFADDRS_H
 | |
| static int uv__ifaddr_exclude(struct ifaddrs *ent, int exclude_type) {
 | |
|   if (!((ent->ifa_flags & IFF_UP) && (ent->ifa_flags & IFF_RUNNING)))
 | |
|     return 1;
 | |
|   if (ent->ifa_addr == NULL)
 | |
|     return 1;
 | |
|   /*
 | |
|    * On Linux getifaddrs returns information related to the raw underlying
 | |
|    * devices. We're not interested in this information yet.
 | |
|    */
 | |
|   if (ent->ifa_addr->sa_family == PF_PACKET)
 | |
|     return exclude_type;
 | |
|   return !exclude_type;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| int uv_interface_addresses(uv_interface_address_t** addresses, int* count) {
 | |
| #ifndef HAVE_IFADDRS_H
 | |
|   *count = 0;
 | |
|   *addresses = NULL;
 | |
|   return UV_ENOSYS;
 | |
| #else
 | |
|   struct ifaddrs *addrs, *ent;
 | |
|   uv_interface_address_t* address;
 | |
|   int i;
 | |
|   struct sockaddr_ll *sll;
 | |
| 
 | |
|   *count = 0;
 | |
|   *addresses = NULL;
 | |
| 
 | |
|   if (getifaddrs(&addrs))
 | |
|     return UV__ERR(errno);
 | |
| 
 | |
|   /* Count the number of interfaces */
 | |
|   for (ent = addrs; ent != NULL; ent = ent->ifa_next) {
 | |
|     if (uv__ifaddr_exclude(ent, UV__EXCLUDE_IFADDR))
 | |
|       continue;
 | |
| 
 | |
|     (*count)++;
 | |
|   }
 | |
| 
 | |
|   if (*count == 0) {
 | |
|     freeifaddrs(addrs);
 | |
|     return 0;
 | |
|   }
 | |
| 
 | |
|   /* Make sure the memory is initiallized to zero using calloc() */
 | |
|   *addresses = uv__calloc(*count, sizeof(**addresses));
 | |
|   if (!(*addresses)) {
 | |
|     freeifaddrs(addrs);
 | |
|     return UV_ENOMEM;
 | |
|   }
 | |
| 
 | |
|   address = *addresses;
 | |
| 
 | |
|   for (ent = addrs; ent != NULL; ent = ent->ifa_next) {
 | |
|     if (uv__ifaddr_exclude(ent, UV__EXCLUDE_IFADDR))
 | |
|       continue;
 | |
| 
 | |
|     address->name = uv__strdup(ent->ifa_name);
 | |
| 
 | |
|     if (ent->ifa_addr->sa_family == AF_INET6) {
 | |
|       address->address.address6 = *((struct sockaddr_in6*) ent->ifa_addr);
 | |
|     } else {
 | |
|       address->address.address4 = *((struct sockaddr_in*) ent->ifa_addr);
 | |
|     }
 | |
| 
 | |
|     if (ent->ifa_netmask->sa_family == AF_INET6) {
 | |
|       address->netmask.netmask6 = *((struct sockaddr_in6*) ent->ifa_netmask);
 | |
|     } else {
 | |
|       address->netmask.netmask4 = *((struct sockaddr_in*) ent->ifa_netmask);
 | |
|     }
 | |
| 
 | |
|     address->is_internal = !!(ent->ifa_flags & IFF_LOOPBACK);
 | |
| 
 | |
|     address++;
 | |
|   }
 | |
| 
 | |
|   /* Fill in physical addresses for each interface */
 | |
|   for (ent = addrs; ent != NULL; ent = ent->ifa_next) {
 | |
|     if (uv__ifaddr_exclude(ent, UV__EXCLUDE_IFPHYS))
 | |
|       continue;
 | |
| 
 | |
|     address = *addresses;
 | |
| 
 | |
|     for (i = 0; i < (*count); i++) {
 | |
|       size_t namelen = strlen(ent->ifa_name);
 | |
|       /* Alias interface share the same physical address */
 | |
|       if (strncmp(address->name, ent->ifa_name, namelen) == 0 &&
 | |
|           (address->name[namelen] == 0 || address->name[namelen] == ':')) {
 | |
|         sll = (struct sockaddr_ll*)ent->ifa_addr;
 | |
|         memcpy(address->phys_addr, sll->sll_addr, sizeof(address->phys_addr));
 | |
|       }
 | |
|       address++;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   freeifaddrs(addrs);
 | |
| 
 | |
|   return 0;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| 
 | |
| void uv_free_interface_addresses(uv_interface_address_t* addresses,
 | |
|   int count) {
 | |
|   int i;
 | |
| 
 | |
|   for (i = 0; i < count; i++) {
 | |
|     uv__free(addresses[i].name);
 | |
|   }
 | |
| 
 | |
|   uv__free(addresses);
 | |
| }
 | |
| 
 | |
| 
 | |
| void uv__set_process_title(const char* title) {
 | |
| #if defined(PR_SET_NAME)
 | |
|   prctl(PR_SET_NAME, title);  /* Only copies first 16 characters. */
 | |
| #endif
 | |
| }
 | |
| 
 | |
| 
 | |
| static uint64_t uv__read_proc_meminfo(const char* what) {
 | |
|   uint64_t rc;
 | |
|   char* p;
 | |
|   char buf[4096];  /* Large enough to hold all of /proc/meminfo. */
 | |
| 
 | |
|   if (uv__slurp("/proc/meminfo", buf, sizeof(buf)))
 | |
|     return 0;
 | |
| 
 | |
|   p = strstr(buf, what);
 | |
| 
 | |
|   if (p == NULL)
 | |
|     return 0;
 | |
| 
 | |
|   p += strlen(what);
 | |
| 
 | |
|   rc = 0;
 | |
|   sscanf(p, "%" PRIu64 " kB", &rc);
 | |
| 
 | |
|   return rc * 1024;
 | |
| }
 | |
| 
 | |
| 
 | |
| uint64_t uv_get_free_memory(void) {
 | |
|   struct sysinfo info;
 | |
|   uint64_t rc;
 | |
| 
 | |
|   rc = uv__read_proc_meminfo("MemAvailable:");
 | |
| 
 | |
|   if (rc != 0)
 | |
|     return rc;
 | |
| 
 | |
|   if (0 == sysinfo(&info))
 | |
|     return (uint64_t) info.freeram * info.mem_unit;
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| uint64_t uv_get_total_memory(void) {
 | |
|   struct sysinfo info;
 | |
|   uint64_t rc;
 | |
| 
 | |
|   rc = uv__read_proc_meminfo("MemTotal:");
 | |
| 
 | |
|   if (rc != 0)
 | |
|     return rc;
 | |
| 
 | |
|   if (0 == sysinfo(&info))
 | |
|     return (uint64_t) info.totalram * info.mem_unit;
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| static uint64_t uv__read_uint64(const char* filename) {
 | |
|   char buf[32];  /* Large enough to hold an encoded uint64_t. */
 | |
|   uint64_t rc;
 | |
| 
 | |
|   rc = 0;
 | |
|   if (0 == uv__slurp(filename, buf, sizeof(buf)))
 | |
|     if (1 != sscanf(buf, "%" PRIu64, &rc))
 | |
|       if (0 == strcmp(buf, "max\n"))
 | |
|         rc = UINT64_MAX;
 | |
| 
 | |
|   return rc;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* Given a buffer with the contents of a cgroup1 /proc/self/cgroups,
 | |
|  * finds the location and length of the memory controller mount path.
 | |
|  * This disregards the leading / for easy concatenation of paths.
 | |
|  * Returns NULL if the memory controller wasn't found. */
 | |
| static char* uv__cgroup1_find_memory_controller(char buf[static 1024],
 | |
|                                                 int* n) {
 | |
|   char* p;
 | |
| 
 | |
|   /* Seek to the memory controller line. */
 | |
|   p = strchr(buf, ':');
 | |
|   while (p != NULL && strncmp(p, ":memory:", 8)) {
 | |
|     p = strchr(p, '\n');
 | |
|     if (p != NULL)
 | |
|       p = strchr(p, ':');
 | |
|   }
 | |
| 
 | |
|   if (p != NULL) {
 | |
|     /* Determine the length of the mount path. */
 | |
|     p = p + strlen(":memory:/");
 | |
|     *n = (int) strcspn(p, "\n");
 | |
|   }
 | |
| 
 | |
|   return p;
 | |
| }
 | |
| 
 | |
| static void uv__get_cgroup1_memory_limits(char buf[static 1024], uint64_t* high,
 | |
|                                           uint64_t* max) {
 | |
|   char filename[4097];
 | |
|   char* p;
 | |
|   int n;
 | |
|   uint64_t cgroup1_max;
 | |
| 
 | |
|   /* Find out where the controller is mounted. */
 | |
|   p = uv__cgroup1_find_memory_controller(buf, &n);
 | |
|   if (p != NULL) {
 | |
|     snprintf(filename, sizeof(filename),
 | |
|              "/sys/fs/cgroup/memory/%.*s/memory.soft_limit_in_bytes", n, p);
 | |
|     *high = uv__read_uint64(filename);
 | |
| 
 | |
|     snprintf(filename, sizeof(filename),
 | |
|              "/sys/fs/cgroup/memory/%.*s/memory.limit_in_bytes", n, p);
 | |
|     *max = uv__read_uint64(filename);
 | |
| 
 | |
|     /* If the controller wasn't mounted, the reads above will have failed,
 | |
|      * as indicated by uv__read_uint64 returning 0.
 | |
|      */
 | |
|      if (*high != 0 && *max != 0)
 | |
|        goto update_limits;
 | |
|   }
 | |
| 
 | |
|   /* Fall back to the limits of the global memory controller. */
 | |
|   *high = uv__read_uint64("/sys/fs/cgroup/memory/memory.soft_limit_in_bytes");
 | |
|   *max = uv__read_uint64("/sys/fs/cgroup/memory/memory.limit_in_bytes");
 | |
| 
 | |
|   /* uv__read_uint64 detects cgroup2's "max", so we need to separately detect
 | |
|    * cgroup1's maximum value (which is derived from LONG_MAX and PAGE_SIZE).
 | |
|    */
 | |
| update_limits:
 | |
|   cgroup1_max = LONG_MAX & ~(sysconf(_SC_PAGESIZE) - 1);
 | |
|   if (*high == cgroup1_max)
 | |
|     *high = UINT64_MAX;
 | |
|   if (*max == cgroup1_max)
 | |
|     *max = UINT64_MAX;
 | |
| }
 | |
| 
 | |
| static void uv__get_cgroup2_memory_limits(char buf[static 1024], uint64_t* high,
 | |
|                                           uint64_t* max) {
 | |
|   char filename[4097];
 | |
|   char* p;
 | |
|   int n;
 | |
| 
 | |
|   /* Find out where the controller is mounted. */
 | |
|   p = buf + strlen("0::/");
 | |
|   n = (int) strcspn(p, "\n");
 | |
| 
 | |
|   /* Read the memory limits of the controller. */
 | |
|   snprintf(filename, sizeof(filename), "/sys/fs/cgroup/%.*s/memory.max", n, p);
 | |
|   *max = uv__read_uint64(filename);
 | |
|   snprintf(filename, sizeof(filename), "/sys/fs/cgroup/%.*s/memory.high", n, p);
 | |
|   *high = uv__read_uint64(filename);
 | |
| }
 | |
| 
 | |
| static uint64_t uv__get_cgroup_constrained_memory(char buf[static 1024]) {
 | |
|   uint64_t high;
 | |
|   uint64_t max;
 | |
| 
 | |
|   /* In the case of cgroupv2, we'll only have a single entry. */
 | |
|   if (strncmp(buf, "0::/", 4))
 | |
|     uv__get_cgroup1_memory_limits(buf, &high, &max);
 | |
|   else
 | |
|     uv__get_cgroup2_memory_limits(buf, &high, &max);
 | |
| 
 | |
|   if (high == 0 || max == 0)
 | |
|     return 0;
 | |
| 
 | |
|   return high < max ? high : max;
 | |
| }
 | |
| 
 | |
| uint64_t uv_get_constrained_memory(void) {
 | |
|   char buf[1024];
 | |
| 
 | |
|   if (uv__slurp("/proc/self/cgroup", buf, sizeof(buf)))
 | |
|     return 0;
 | |
| 
 | |
|   return uv__get_cgroup_constrained_memory(buf);
 | |
| }
 | |
| 
 | |
| 
 | |
| static uint64_t uv__get_cgroup1_current_memory(char buf[static 1024]) {
 | |
|   char filename[4097];
 | |
|   uint64_t current;
 | |
|   char* p;
 | |
|   int n;
 | |
| 
 | |
|   /* Find out where the controller is mounted. */
 | |
|   p = uv__cgroup1_find_memory_controller(buf, &n);
 | |
|   if (p != NULL) {
 | |
|     snprintf(filename, sizeof(filename),
 | |
|             "/sys/fs/cgroup/memory/%.*s/memory.usage_in_bytes", n, p);
 | |
|     current = uv__read_uint64(filename);
 | |
| 
 | |
|     /* If the controller wasn't mounted, the reads above will have failed,
 | |
|      * as indicated by uv__read_uint64 returning 0.
 | |
|      */
 | |
|     if (current != 0)
 | |
|       return current;
 | |
|   }
 | |
| 
 | |
|   /* Fall back to the usage of the global memory controller. */
 | |
|   return uv__read_uint64("/sys/fs/cgroup/memory/memory.usage_in_bytes");
 | |
| }
 | |
| 
 | |
| static uint64_t uv__get_cgroup2_current_memory(char buf[static 1024]) {
 | |
|   char filename[4097];
 | |
|   char* p;
 | |
|   int n;
 | |
| 
 | |
|   /* Find out where the controller is mounted. */
 | |
|   p = buf + strlen("0::/");
 | |
|   n = (int) strcspn(p, "\n");
 | |
| 
 | |
|   snprintf(filename, sizeof(filename),
 | |
|            "/sys/fs/cgroup/%.*s/memory.current", n, p);
 | |
|   return uv__read_uint64(filename);
 | |
| }
 | |
| 
 | |
| uint64_t uv_get_available_memory(void) {
 | |
|   char buf[1024];
 | |
|   uint64_t constrained;
 | |
|   uint64_t current;
 | |
|   uint64_t total;
 | |
| 
 | |
|   if (uv__slurp("/proc/self/cgroup", buf, sizeof(buf)))
 | |
|     return 0;
 | |
| 
 | |
|   constrained = uv__get_cgroup_constrained_memory(buf);
 | |
|   if (constrained == 0)
 | |
|     return uv_get_free_memory();
 | |
| 
 | |
|   total = uv_get_total_memory();
 | |
|   if (constrained > total)
 | |
|     return uv_get_free_memory();
 | |
| 
 | |
|   /* In the case of cgroupv2, we'll only have a single entry. */
 | |
|   if (strncmp(buf, "0::/", 4))
 | |
|     current = uv__get_cgroup1_current_memory(buf);
 | |
|   else
 | |
|     current = uv__get_cgroup2_current_memory(buf);
 | |
| 
 | |
|   /* memory usage can be higher than the limit (for short bursts of time) */
 | |
|   if (constrained < current)
 | |
|     return 0;
 | |
| 
 | |
|   return constrained - current;
 | |
| }
 | |
| 
 | |
| 
 | |
| void uv_loadavg(double avg[3]) {
 | |
|   struct sysinfo info;
 | |
|   char buf[128];  /* Large enough to hold all of /proc/loadavg. */
 | |
| 
 | |
|   if (0 == uv__slurp("/proc/loadavg", buf, sizeof(buf)))
 | |
|     if (3 == sscanf(buf, "%lf %lf %lf", &avg[0], &avg[1], &avg[2]))
 | |
|       return;
 | |
| 
 | |
|   if (sysinfo(&info) < 0)
 | |
|     return;
 | |
| 
 | |
|   avg[0] = (double) info.loads[0] / 65536.0;
 | |
|   avg[1] = (double) info.loads[1] / 65536.0;
 | |
|   avg[2] = (double) info.loads[2] / 65536.0;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int compare_watchers(const struct watcher_list* a,
 | |
|                             const struct watcher_list* b) {
 | |
|   if (a->wd < b->wd) return -1;
 | |
|   if (a->wd > b->wd) return 1;
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int init_inotify(uv_loop_t* loop) {
 | |
|   int fd;
 | |
| 
 | |
|   if (loop->inotify_fd != -1)
 | |
|     return 0;
 | |
| 
 | |
|   fd = inotify_init1(IN_NONBLOCK | IN_CLOEXEC);
 | |
|   if (fd < 0)
 | |
|     return UV__ERR(errno);
 | |
| 
 | |
|   loop->inotify_fd = fd;
 | |
|   uv__io_init(&loop->inotify_read_watcher, uv__inotify_read, loop->inotify_fd);
 | |
|   uv__io_start(loop, &loop->inotify_read_watcher, POLLIN);
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int uv__inotify_fork(uv_loop_t* loop, struct watcher_list* root) {
 | |
|   /* Open the inotify_fd, and re-arm all the inotify watchers. */
 | |
|   int err;
 | |
|   struct watcher_list* tmp_watcher_list_iter;
 | |
|   struct watcher_list* watcher_list;
 | |
|   struct watcher_list tmp_watcher_list;
 | |
|   struct uv__queue queue;
 | |
|   struct uv__queue* q;
 | |
|   uv_fs_event_t* handle;
 | |
|   char* tmp_path;
 | |
| 
 | |
|   if (root == NULL)
 | |
|     return 0;
 | |
| 
 | |
|   /* We must restore the old watcher list to be able to close items
 | |
|    * out of it.
 | |
|    */
 | |
|   loop->inotify_watchers = root;
 | |
| 
 | |
|   uv__queue_init(&tmp_watcher_list.watchers);
 | |
|   /* Note that the queue we use is shared with the start and stop()
 | |
|    * functions, making uv__queue_foreach unsafe to use. So we use the
 | |
|    * uv__queue_move trick to safely iterate. Also don't free the watcher
 | |
|    * list until we're done iterating. c.f. uv__inotify_read.
 | |
|    */
 | |
|   RB_FOREACH_SAFE(watcher_list, watcher_root,
 | |
|                   uv__inotify_watchers(loop), tmp_watcher_list_iter) {
 | |
|     watcher_list->iterating = 1;
 | |
|     uv__queue_move(&watcher_list->watchers, &queue);
 | |
|     while (!uv__queue_empty(&queue)) {
 | |
|       q = uv__queue_head(&queue);
 | |
|       handle = uv__queue_data(q, uv_fs_event_t, watchers);
 | |
|       /* It's critical to keep a copy of path here, because it
 | |
|        * will be set to NULL by stop() and then deallocated by
 | |
|        * maybe_free_watcher_list
 | |
|        */
 | |
|       tmp_path = uv__strdup(handle->path);
 | |
|       assert(tmp_path != NULL);
 | |
|       uv__queue_remove(q);
 | |
|       uv__queue_insert_tail(&watcher_list->watchers, q);
 | |
|       uv_fs_event_stop(handle);
 | |
| 
 | |
|       uv__queue_insert_tail(&tmp_watcher_list.watchers, &handle->watchers);
 | |
|       handle->path = tmp_path;
 | |
|     }
 | |
|     watcher_list->iterating = 0;
 | |
|     maybe_free_watcher_list(watcher_list, loop);
 | |
|   }
 | |
| 
 | |
|   uv__queue_move(&tmp_watcher_list.watchers, &queue);
 | |
|   while (!uv__queue_empty(&queue)) {
 | |
|       q = uv__queue_head(&queue);
 | |
|       uv__queue_remove(q);
 | |
|       handle = uv__queue_data(q, uv_fs_event_t, watchers);
 | |
|       tmp_path = handle->path;
 | |
|       handle->path = NULL;
 | |
|       err = uv_fs_event_start(handle, handle->cb, tmp_path, 0);
 | |
|       uv__free(tmp_path);
 | |
|       if (err)
 | |
|         return err;
 | |
|   }
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| static struct watcher_list* find_watcher(uv_loop_t* loop, int wd) {
 | |
|   struct watcher_list w;
 | |
|   w.wd = wd;
 | |
|   return RB_FIND(watcher_root, uv__inotify_watchers(loop), &w);
 | |
| }
 | |
| 
 | |
| 
 | |
| static void maybe_free_watcher_list(struct watcher_list* w, uv_loop_t* loop) {
 | |
|   /* if the watcher_list->watchers is being iterated over, we can't free it. */
 | |
|   if ((!w->iterating) && uv__queue_empty(&w->watchers)) {
 | |
|     /* No watchers left for this path. Clean up. */
 | |
|     RB_REMOVE(watcher_root, uv__inotify_watchers(loop), w);
 | |
|     inotify_rm_watch(loop->inotify_fd, w->wd);
 | |
|     uv__free(w);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static void uv__inotify_read(uv_loop_t* loop,
 | |
|                              uv__io_t* dummy,
 | |
|                              unsigned int events) {
 | |
|   const struct inotify_event* e;
 | |
|   struct watcher_list* w;
 | |
|   uv_fs_event_t* h;
 | |
|   struct uv__queue queue;
 | |
|   struct uv__queue* q;
 | |
|   const char* path;
 | |
|   ssize_t size;
 | |
|   const char *p;
 | |
|   /* needs to be large enough for sizeof(inotify_event) + strlen(path) */
 | |
|   char buf[4096];
 | |
| 
 | |
|   for (;;) {
 | |
|     do
 | |
|       size = read(loop->inotify_fd, buf, sizeof(buf));
 | |
|     while (size == -1 && errno == EINTR);
 | |
| 
 | |
|     if (size == -1) {
 | |
|       assert(errno == EAGAIN || errno == EWOULDBLOCK);
 | |
|       break;
 | |
|     }
 | |
| 
 | |
|     assert(size > 0); /* pre-2.6.21 thing, size=0 == read buffer too small */
 | |
| 
 | |
|     /* Now we have one or more inotify_event structs. */
 | |
|     for (p = buf; p < buf + size; p += sizeof(*e) + e->len) {
 | |
|       e = (const struct inotify_event*) p;
 | |
| 
 | |
|       events = 0;
 | |
|       if (e->mask & (IN_ATTRIB|IN_MODIFY))
 | |
|         events |= UV_CHANGE;
 | |
|       if (e->mask & ~(IN_ATTRIB|IN_MODIFY))
 | |
|         events |= UV_RENAME;
 | |
| 
 | |
|       w = find_watcher(loop, e->wd);
 | |
|       if (w == NULL)
 | |
|         continue; /* Stale event, no watchers left. */
 | |
| 
 | |
|       /* inotify does not return the filename when monitoring a single file
 | |
|        * for modifications. Repurpose the filename for API compatibility.
 | |
|        * I'm not convinced this is a good thing, maybe it should go.
 | |
|        */
 | |
|       path = e->len ? (const char*) (e + 1) : uv__basename_r(w->path);
 | |
| 
 | |
|       /* We're about to iterate over the queue and call user's callbacks.
 | |
|        * What can go wrong?
 | |
|        * A callback could call uv_fs_event_stop()
 | |
|        * and the queue can change under our feet.
 | |
|        * So, we use uv__queue_move() trick to safely iterate over the queue.
 | |
|        * And we don't free the watcher_list until we're done iterating.
 | |
|        *
 | |
|        * First,
 | |
|        * tell uv_fs_event_stop() (that could be called from a user's callback)
 | |
|        * not to free watcher_list.
 | |
|        */
 | |
|       w->iterating = 1;
 | |
|       uv__queue_move(&w->watchers, &queue);
 | |
|       while (!uv__queue_empty(&queue)) {
 | |
|         q = uv__queue_head(&queue);
 | |
|         h = uv__queue_data(q, uv_fs_event_t, watchers);
 | |
| 
 | |
|         uv__queue_remove(q);
 | |
|         uv__queue_insert_tail(&w->watchers, q);
 | |
| 
 | |
|         h->cb(h, path, events, 0);
 | |
|       }
 | |
|       /* done iterating, time to (maybe) free empty watcher_list */
 | |
|       w->iterating = 0;
 | |
|       maybe_free_watcher_list(w, loop);
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| int uv_fs_event_init(uv_loop_t* loop, uv_fs_event_t* handle) {
 | |
|   uv__handle_init(loop, (uv_handle_t*)handle, UV_FS_EVENT);
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| int uv_fs_event_start(uv_fs_event_t* handle,
 | |
|                       uv_fs_event_cb cb,
 | |
|                       const char* path,
 | |
|                       unsigned int flags) {
 | |
|   struct watcher_list* w;
 | |
|   uv_loop_t* loop;
 | |
|   size_t len;
 | |
|   int events;
 | |
|   int err;
 | |
|   int wd;
 | |
| 
 | |
|   if (uv__is_active(handle))
 | |
|     return UV_EINVAL;
 | |
| 
 | |
|   loop = handle->loop;
 | |
| 
 | |
|   err = init_inotify(loop);
 | |
|   if (err)
 | |
|     return err;
 | |
| 
 | |
|   events = IN_ATTRIB
 | |
|          | IN_CREATE
 | |
|          | IN_MODIFY
 | |
|          | IN_DELETE
 | |
|          | IN_DELETE_SELF
 | |
|          | IN_MOVE_SELF
 | |
|          | IN_MOVED_FROM
 | |
|          | IN_MOVED_TO;
 | |
| 
 | |
|   wd = inotify_add_watch(loop->inotify_fd, path, events);
 | |
|   if (wd == -1)
 | |
|     return UV__ERR(errno);
 | |
| 
 | |
|   w = find_watcher(loop, wd);
 | |
|   if (w)
 | |
|     goto no_insert;
 | |
| 
 | |
|   len = strlen(path) + 1;
 | |
|   w = uv__malloc(sizeof(*w) + len);
 | |
|   if (w == NULL)
 | |
|     return UV_ENOMEM;
 | |
| 
 | |
|   w->wd = wd;
 | |
|   w->path = memcpy(w + 1, path, len);
 | |
|   uv__queue_init(&w->watchers);
 | |
|   w->iterating = 0;
 | |
|   RB_INSERT(watcher_root, uv__inotify_watchers(loop), w);
 | |
| 
 | |
| no_insert:
 | |
|   uv__handle_start(handle);
 | |
|   uv__queue_insert_tail(&w->watchers, &handle->watchers);
 | |
|   handle->path = w->path;
 | |
|   handle->cb = cb;
 | |
|   handle->wd = wd;
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| int uv_fs_event_stop(uv_fs_event_t* handle) {
 | |
|   struct watcher_list* w;
 | |
| 
 | |
|   if (!uv__is_active(handle))
 | |
|     return 0;
 | |
| 
 | |
|   w = find_watcher(handle->loop, handle->wd);
 | |
|   assert(w != NULL);
 | |
| 
 | |
|   handle->wd = -1;
 | |
|   handle->path = NULL;
 | |
|   uv__handle_stop(handle);
 | |
|   uv__queue_remove(&handle->watchers);
 | |
| 
 | |
|   maybe_free_watcher_list(w, handle->loop);
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| void uv__fs_event_close(uv_fs_event_t* handle) {
 | |
|   uv_fs_event_stop(handle);
 | |
| }
 |