Monitor what is modifiying your files
Files | Log | Commits | Refs | README
Author: SM
Date: 2025-08-22
Subject: initial
commit f86b6edfe4ec0250a25b5dd25a04a3c4c74ec34b Author: SM <seb.michalk@gmail.com> Date: Fri Aug 22 16:07:54 2025 +0200 initial diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f07438c --- /dev/null +++ b/Makefile @@ -0,0 +1,19 @@ +CC = gcc +CFLAGS = -std=c99 -Wall -Wextra -O2 -D_POSIX_C_SOURCE=200809L +TARGET = who +SOURCES = main.c proc.c + +.PHONY: all clean install + +all: $(TARGET) + +$(TARGET): $(SOURCES) + $(CC) $(CFLAGS) -o $(TARGET) $(SOURCES) + +clean: + rm -f $(TARGET) + +install: $(TARGET) + install -m 755 $(TARGET) /usr/local/bin/ + +.SUFFIXES: .c .o diff --git a/README.md b/README.md new file mode 100644 index 0000000..d60ec2b --- /dev/null +++ b/README.md @@ -0,0 +1,70 @@ +# who + +who is a simple file monitor that tracks which processes modify files + +## usage + + who [-o output] directory + +monitor `directory` recursively and log file changes to `output` (default: who.log) + +## build + + make + +requires linux with netlink connector support + +## how it works + +- uses inotify for file events +- uses netlink connector for process events +- correlates file changes to processes via /proc filesystem +- filters kernel threads and long-running daemons +- prefers recently started user processes + +## output format + + timestamp action path pid=N uid=N gid=N comm=name cwd=dir + +## requirements + +- linux 2.6.14+ +- root privileges (for netlink connector) +- gcc with c99 support + +## files + + main.c main program + proc.c /proc filesystem utilities + proc.h process definitions + Makefile build rules + +## architecture + +uses kernel apis directly instead of heavyweight frameworks: + +- netlink connector catches all process lifecycle events (fork/exec/exit) +- inotify provides efficient file change notifications +- /proc filesystem gives process context (cwd, uid, comm) +- epoll multiplexes events in single thread + +correlation heuristic: prefer recently started processes with directory access + +who tracks process lifecycle events via netlink. processes started before +it's initialization are not tracked and may be incorrectly attributed to +long-running parent processes (shells, multiplexers). start who before +launching monitored applications for accurate correlation. + +## limitations + +- linux specific (netlink, /proc, inotify) +- requires root for netlink connector +- correlation is heuristic, not guaranteed accurate +- no support for containers/namespaces +- limited to MAX_PROCS (1024) tracked processes +- long pathnames may be truncated +- processes started before who cannot be correlated accurately + +## license + +MIT diff --git a/main.c b/main.c new file mode 100644 index 0000000..d20dda0 --- /dev/null +++ b/main.c @@ -0,0 +1,583 @@ +/* See LICENSE file for copyright and license details. + * who - file change process tracker + * Monitors directory recursively and tracks which processes modify files + */ + +#define _POSIX_C_SOURCE 200809L + +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/epoll.h> +#include <sys/inotify.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +#include <dirent.h> +#include <libgen.h> +#include <limits.h> + +#include <linux/cn_proc.h> +#include <linux/connector.h> +#include <linux/netlink.h> + +#include "proc.h" + +#define MAX_EVENTS 10 +#define MAX_PROCS 1024 +#define MAX_PATH 4096 +#define PROC_BUF_SIZE 1024 + +enum { + STATE_INIT, + STATE_MONITORING, + STATE_SHUTDOWN +}; + +typedef struct FileEvent { + char path[MAX_PATH]; + time_t ts; + uint32_t mask; +} FileEvent; + +/* global state */ +static int state = STATE_INIT; +static int nlfd = -1; +static int ifd = -1; +static int efd = -1; +static FILE *out = NULL; +static Process procs[MAX_PROCS]; +static int nprocs = 0; +static char dir[MAX_PATH]; + +/* function declarations */ +static void die(const char *fmt, ...); +static void usage(void); +static int initnetlink(void); +static int initinotify(const char *dir); +static int addwatch(int fd, const char *path); +static void handleproc(void); +static void handlefile(void); +static Process *findproc(pid_t pid); +static void addproc(pid_t pid, pid_t ppid, const char *comm); +static void rmproc(pid_t pid); +static Process *correlate(const char *path, time_t timestamp); +static void logchange(const char *path, Process *proc, uint32_t mask); +static void cleanup(void); +static void sighandler(int sig); +static void scanprocs(void); + +static void +die(const char *fmt, ...) +{ + va_list ap; + int saved_errno; + + saved_errno = errno; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + if (fmt[0] && fmt[strlen(fmt)-1] == ':') + fprintf(stderr, " %s", strerror(saved_errno)); + fputc('\n', stderr); + + exit(1); +} + +static void +usage(void) +{ + die("usage: who [-o output] directory\n"); +} + +int +main(int argc, char *argv[]) +{ + const char *outpath; + int opt, nfds; + struct epoll_event events[MAX_EVENTS]; + + outpath = "who.log"; + + while ((opt = getopt(argc, argv, "o:")) != -1) { + switch (opt) { + case 'o': + outpath = optarg; + break; + default: + usage(); + } + } + + if (optind >= argc) + usage(); + + if (strlen(argv[optind]) >= MAX_PATH) + die("directory path too long"); + + strcpy(dir, argv[optind]); + + signal(SIGINT, sighandler); + signal(SIGTERM, sighandler); + + out = fopen(outpath, "w"); + if (!out) + die("fopen %s:", outpath); + + efd = epoll_create1(EPOLL_CLOEXEC); + if (efd == -1) + die("epoll_create1:"); + + nlfd = initnetlink(); + ifd = initinotify(dir); + + scanprocs(); + + state = STATE_MONITORING; + fprintf(out, "# Who - started monitoring %s\n", dir); + fprintf(out, "# Active processes: %d\n", nprocs); + fflush(out); + + while (state == STATE_MONITORING) { + nfds = epoll_wait(efd, events, MAX_EVENTS, -1); + if (nfds == -1) { + if (errno == EINTR) + continue; + die("epoll_wait:"); + } + + for (int i = 0; i < nfds; i++) { + if (events[i].data.fd == nlfd) { + handleproc(); + } else if (events[i].data.fd == ifd) { + handlefile(); + } + } + } + + cleanup(); + return 0; +} + +static void +sighandler(int sig) +{ + (void)sig; + state = STATE_SHUTDOWN; +} + +static int +initnetlink(void) +{ + struct sockaddr_nl sa_nl; + struct epoll_event ev; + int sock; + + sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR); + if (sock == -1) + die("socket:"); + + memset(&sa_nl, 0, sizeof(sa_nl)); + sa_nl.nl_family = AF_NETLINK; + sa_nl.nl_groups = CN_IDX_PROC; + sa_nl.nl_pid = getpid(); + + if (bind(sock, (struct sockaddr *)&sa_nl, sizeof(sa_nl)) == -1) + die("bind:"); + + /* Increase socket buffer size */ + int bufsize = 65536; + if (setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &bufsize, sizeof(bufsize)) == -1) + die("setsockopt:"); + + struct { + struct nlmsghdr nl_hdr; + struct cn_msg cn_msg; + enum proc_cn_mcast_op cn_mcast; + } msg; + + memset(&msg, 0, sizeof(msg)); + msg.nl_hdr.nlmsg_len = sizeof(msg); + msg.nl_hdr.nlmsg_type = NLMSG_DONE; + msg.nl_hdr.nlmsg_flags = 0; + msg.nl_hdr.nlmsg_seq = 0; + msg.nl_hdr.nlmsg_pid = getpid(); + + msg.cn_msg.id.idx = CN_IDX_PROC; + msg.cn_msg.id.val = CN_VAL_PROC; + msg.cn_msg.len = sizeof(enum proc_cn_mcast_op); + msg.cn_mcast = PROC_CN_MCAST_LISTEN; + + if (send(sock, &msg, sizeof(msg), 0) == -1) + die("send:"); + + ev.events = EPOLLIN; + ev.data.fd = sock; + if (epoll_ctl(efd, EPOLL_CTL_ADD, sock, &ev) == -1) + die("epoll_ctl:"); + + return sock; +} + +static void +handleproc(void) +{ + struct { + struct nlmsghdr nl_hdr; + struct cn_msg cn_msg; + struct proc_event proc_ev; + } msg; + ssize_t len; + struct proc_event *ev; + + len = recv(nlfd, &msg, sizeof(msg), 0); + if (len == -1) { + if (errno == EINTR || errno == EAGAIN) + return; + die("recv:"); + } + + if (len < (ssize_t)(sizeof(msg.nl_hdr) + sizeof(msg.cn_msg))) + return; + + ev = &msg.proc_ev; + + switch (ev->what) { + case PROC_EVENT_EXEC: + addproc(ev->event_data.exec.process_pid, + ev->event_data.exec.process_tgid, + "unknown"); + break; + case PROC_EVENT_EXIT: + rmproc(ev->event_data.exit.process_pid); + break; + case PROC_EVENT_FORK: + addproc(ev->event_data.fork.child_pid, + ev->event_data.fork.parent_pid, + "unknown"); + break; + default: + break; + } +} + +static Process * +findproc(pid_t pid) +{ + int i; + + for (i = 0; i < nprocs; i++) { + if (procs[i].active && procs[i].pid == pid) + return &procs[i]; + } + return NULL; +} + +static void +addproc(pid_t pid, pid_t ppid, const char *comm) +{ + Process *proc; + int i; + + proc = findproc(pid); + if (!proc) { + for (i = 0; i < MAX_PROCS; i++) { + if (!procs[i].active) { + proc = &procs[i]; + nprocs++; + break; + } + } + } + + if (!proc) + return; + + proc->pid = pid; + proc->ppid = ppid; + proc->active = 1; + proc->start = time(NULL); + strncpy(proc->comm, comm, sizeof(proc->comm) - 1); + proc->comm[sizeof(proc->comm) - 1] = '\0'; + + updateproc(proc); +} + +static void +rmproc(pid_t pid) +{ + Process *proc; + + proc = findproc(pid); + if (proc) { + proc->active = 0; + nprocs--; + } +} + +static int +initinotify(const char *dir) +{ + struct epoll_event ev; + int fd; + + fd = inotify_init1(IN_CLOEXEC); + if (fd == -1) + die("inotify_init1:"); + + if (addwatch(fd, dir) == -1) { + close(fd); + die("add_watch_recursive:"); + } + + ev.events = EPOLLIN; + ev.data.fd = fd; + if (epoll_ctl(efd, EPOLL_CTL_ADD, fd, &ev) == -1) { + close(fd); + die("epoll_ctl:"); + } + + return fd; +} + +static int +addwatch(int fd, const char *path) +{ + DIR *dir; + struct dirent *entry; + struct stat statbuf; + char fullpath[MAX_PATH]; + int wd; + + wd = inotify_add_watch(fd, path, IN_CREATE | IN_DELETE | IN_MODIFY | + IN_MOVED_FROM | IN_MOVED_TO | IN_CLOSE_WRITE); + if (wd == -1) + return -1; + + dir = opendir(path); + if (!dir) + return -1; + + while ((entry = readdir(dir)) != NULL) { + if (strcmp(entry->d_name, ".") == 0 || + strcmp(entry->d_name, "..") == 0) + continue; + + if (snprintf(fullpath, sizeof(fullpath), "%s/%s", + path, entry->d_name) >= (int)sizeof(fullpath)) { + closedir(dir); + return -1; + } + + if (stat(fullpath, &statbuf) == -1) + continue; + + if (S_ISDIR(statbuf.st_mode)) { + if (addwatch(fd, fullpath) == -1) { + closedir(dir); + return -1; + } + } + } + + closedir(dir); + return 0; +} + +static void +handlefile(void) +{ + char buffer[4096], path[MAX_PATH], *ptr; + ssize_t len; + struct inotify_event *event; + Process *proc; + + len = read(ifd, buffer, sizeof(buffer)); + if (len == -1) { + if (errno == EINTR || errno == EAGAIN) + return; + die("read:"); + } + + ptr = buffer; + while (ptr < buffer + len) { + event = (struct inotify_event *)ptr; + + if (event->len > 0) { + if (dir[strlen(dir) - 1] == '/') { + snprintf(path, sizeof(path), "%s%s", + dir, event->name); + } else { + snprintf(path, sizeof(path), "%s/%s", + dir, event->name); + } + + proc = correlate(path, time(NULL)); + + logchange(path, proc, event->mask); + + if (event->mask & IN_CREATE) { + struct stat statbuf; + if (stat(path, &statbuf) == 0 && + S_ISDIR(statbuf.st_mode)) { + addwatch(ifd, path); + } + } + } + + ptr += sizeof(struct inotify_event) + event->len; + } +} + + +static int +hasaccess(Process *proc, const char *filepath) +{ + char *fdir, *dpath; + int cwdlen, dpathlen; + + if (!(fdir = strdup(filepath))) + return 0; + dpath = dirname(fdir); + + if (!proc->cwd[0] || !strcmp(proc->cwd, "unknown")) { + free(fdir); + return 0; + } + + cwdlen = strlen(proc->cwd); + dpathlen = strlen(dpath); + + int result = !strcmp(dpath, proc->cwd) || + (!strncmp(dpath, proc->cwd, cwdlen) && dpath[cwdlen] == '/') || + (!strncmp(proc->cwd, dpath, dpathlen) && proc->cwd[dpathlen] == '/'); + + free(fdir); + return result; +} + +static Process * +correlate(const char *path, time_t ts) +{ + Process *best, *proc; + time_t bestdiff, diff; + int i; + + best = NULL; + bestdiff = LONG_MAX; + + for (i = 0; i < MAX_PROCS; i++) { + proc = &procs[i]; + + if (!proc->active || proc->start > ts) + continue; + + if (proc->pid < 100 || ts - proc->start > 86400 || + strstr(proc->comm, "kworker") || strstr(proc->comm, "ksoftirqd") || + strstr(proc->comm, "migration") || proc->comm[0] == '[') + continue; + + /* Only update process info if it's stale */ + if (ts - proc->start > 60) + updateproc(proc); + + if (!hasaccess(proc, path)) + continue; + + diff = ts - proc->start; + if (diff < bestdiff) { + bestdiff = diff; + best = proc; + } + } + + return best; +} + +static const char * +maskstr(uint32_t mask) +{ + if (mask & IN_CREATE) + return "CREATE"; + if (mask & IN_DELETE) + return "DELETE"; + if (mask & IN_MODIFY) + return "MODIFY"; + if (mask & IN_CLOSE_WRITE) + return "WRITE"; + if (mask & IN_MOVED_FROM) + return "MOVE_FROM"; + if (mask & IN_MOVED_TO) + return "MOVE_TO"; + return "UNKNOWN"; +} + +static void +logchange(const char *path, Process *proc, uint32_t mask) +{ + time_t now; + struct tm *tm; + char ts[32]; + + now = time(NULL); + tm = localtime(&now); + strftime(ts, sizeof(ts), "%Y-%m-%d %H:%M:%S", tm); + + if (proc) { + fprintf(out, "%s %s %s pid=%d uid=%d gid=%d comm=%s cwd=%s\n", + ts, maskstr(mask), path, + proc->pid, proc->uid, proc->gid, proc->comm, proc->cwd); + } else { + fprintf(out, "%s %s %s pid=? uid=? gid=? comm=? cwd=?\n", + ts, maskstr(mask), path); + } + + fflush(out); +} + +static void +cleanup(void) +{ + if (out) { + fprintf(out, "# Who - shutdown\n"); + fclose(out); + } + if (nlfd != -1) + close(nlfd); + if (ifd != -1) + close(ifd); + if (efd != -1) + close(efd); +} + +static void +scanprocs(void) +{ + DIR *proc_dir; + struct dirent *entry; + pid_t pid; + char *endptr; + + proc_dir = opendir("/proc"); + if (!proc_dir) + return; + + while ((entry = readdir(proc_dir)) != NULL) { + pid = strtol(entry->d_name, &endptr, 10); + if (*endptr != '\0' || pid <= 0) + continue; + + addproc(pid, 0, "existing"); + } + + closedir(proc_dir); +} diff --git a/proc.c b/proc.c new file mode 100644 index 0000000..705e4fe --- /dev/null +++ b/proc.c @@ -0,0 +1,67 @@ +/* See LICENSE file for copyright and license details. */ + +#define _POSIX_C_SOURCE 200809L + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "proc.h" + +void +getcomm(Process *proc) +{ + char path[256], buf[256]; + FILE *f; + + snprintf(path, sizeof(path), "/proc/%d/comm", proc->pid); + if ((f = fopen(path, "r"))) { + if (fgets(buf, sizeof(buf), f)) + buf[strcspn(buf, "\n")] = '\0'; + strncpy(proc->comm, buf, sizeof(proc->comm) - 1); + proc->comm[sizeof(proc->comm) - 1] = '\0'; + fclose(f); + } +} + +void +getprocwd(Process *proc) +{ + char path[256]; + ssize_t len; + + snprintf(path, sizeof(path), "/proc/%d/cwd", proc->pid); + len = readlink(path, proc->cwd, sizeof(proc->cwd) - 1); + + if (len != -1) + proc->cwd[len] = '\0'; + else + strcpy(proc->cwd, "unknown"); +} + +void +getids(Process *proc) +{ + char path[256], buf[256]; + FILE *f; + + snprintf(path, sizeof(path), "/proc/%d/status", proc->pid); + if ((f = fopen(path, "r"))) { + while (fgets(buf, sizeof(buf), f)) { + if (!strncmp(buf, "Uid:", 4)) + sscanf(buf, "Uid:\t%u", &proc->uid); + else if (!strncmp(buf, "Gid:", 4)) + sscanf(buf, "Gid:\t%u", &proc->gid); + } + fclose(f); + } +} + +void +updateproc(Process *proc) +{ + getcomm(proc); + getprocwd(proc); + getids(proc); +} diff --git a/proc.h b/proc.h new file mode 100644 index 0000000..4cbace9 --- /dev/null +++ b/proc.h @@ -0,0 +1,27 @@ +/* See LICENSE file for copyright and license details. */ + +#ifndef PROC_H +#define PROC_H + +#include <sys/types.h> +#include <time.h> + +#define MAX_PATH 4096 + +typedef struct Process { + pid_t pid; + pid_t ppid; + uid_t uid; + gid_t gid; + char comm[16]; + char cwd[MAX_PATH]; + time_t start; + int active; +} Process; + +void getcomm(Process *proc); +void getprocwd(Process *proc); +void getids(Process *proc); +void updateproc(Process *proc); + +#endif