Monitor what is modifiying your files
Files | Log | Commits | Refs | README
Author: SM
Date: 2025-08-22
Subject: initial
commit f86b6edfe4ec0250a25b5dd25a04a3c4c74ec34b
Author: SM <seb.michalk@gmail.com>
Date: Fri Aug 22 16:07:54 2025 +0200
initial
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..f07438c
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,19 @@
+CC = gcc
+CFLAGS = -std=c99 -Wall -Wextra -O2 -D_POSIX_C_SOURCE=200809L
+TARGET = who
+SOURCES = main.c proc.c
+
+.PHONY: all clean install
+
+all: $(TARGET)
+
+$(TARGET): $(SOURCES)
+ $(CC) $(CFLAGS) -o $(TARGET) $(SOURCES)
+
+clean:
+ rm -f $(TARGET)
+
+install: $(TARGET)
+ install -m 755 $(TARGET) /usr/local/bin/
+
+.SUFFIXES: .c .o
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..d60ec2b
--- /dev/null
+++ b/README.md
@@ -0,0 +1,70 @@
+# who
+
+who is a simple file monitor that tracks which processes modify files
+
+## usage
+
+ who [-o output] directory
+
+monitor `directory` recursively and log file changes to `output` (default: who.log)
+
+## build
+
+ make
+
+requires linux with netlink connector support
+
+## how it works
+
+- uses inotify for file events
+- uses netlink connector for process events
+- correlates file changes to processes via /proc filesystem
+- filters kernel threads and long-running daemons
+- prefers recently started user processes
+
+## output format
+
+ timestamp action path pid=N uid=N gid=N comm=name cwd=dir
+
+## requirements
+
+- linux 2.6.14+
+- root privileges (for netlink connector)
+- gcc with c99 support
+
+## files
+
+ main.c main program
+ proc.c /proc filesystem utilities
+ proc.h process definitions
+ Makefile build rules
+
+## architecture
+
+uses kernel apis directly instead of heavyweight frameworks:
+
+- netlink connector catches all process lifecycle events (fork/exec/exit)
+- inotify provides efficient file change notifications
+- /proc filesystem gives process context (cwd, uid, comm)
+- epoll multiplexes events in single thread
+
+correlation heuristic: prefer recently started processes with directory access
+
+who tracks process lifecycle events via netlink. processes started before
+it's initialization are not tracked and may be incorrectly attributed to
+long-running parent processes (shells, multiplexers). start who before
+launching monitored applications for accurate correlation.
+
+## limitations
+
+- linux specific (netlink, /proc, inotify)
+- requires root for netlink connector
+- correlation is heuristic, not guaranteed accurate
+- no support for containers/namespaces
+- limited to MAX_PROCS (1024) tracked processes
+- long pathnames may be truncated
+- processes started before who cannot be correlated accurately
+
+## license
+
+MIT
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..d20dda0
--- /dev/null
+++ b/main.c
@@ -0,0 +1,583 @@
+/* See LICENSE file for copyright and license details.
+ * who - file change process tracker
+ * Monitors directory recursively and tracks which processes modify files
+ */
+
+#define _POSIX_C_SOURCE 200809L
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/inotify.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <dirent.h>
+#include <libgen.h>
+#include <limits.h>
+
+#include <linux/cn_proc.h>
+#include <linux/connector.h>
+#include <linux/netlink.h>
+
+#include "proc.h"
+
+#define MAX_EVENTS 10
+#define MAX_PROCS 1024
+#define MAX_PATH 4096
+#define PROC_BUF_SIZE 1024
+
+enum {
+ STATE_INIT,
+ STATE_MONITORING,
+ STATE_SHUTDOWN
+};
+
+typedef struct FileEvent {
+ char path[MAX_PATH];
+ time_t ts;
+ uint32_t mask;
+} FileEvent;
+
+/* global state */
+static int state = STATE_INIT;
+static int nlfd = -1;
+static int ifd = -1;
+static int efd = -1;
+static FILE *out = NULL;
+static Process procs[MAX_PROCS];
+static int nprocs = 0;
+static char dir[MAX_PATH];
+
+/* function declarations */
+static void die(const char *fmt, ...);
+static void usage(void);
+static int initnetlink(void);
+static int initinotify(const char *dir);
+static int addwatch(int fd, const char *path);
+static void handleproc(void);
+static void handlefile(void);
+static Process *findproc(pid_t pid);
+static void addproc(pid_t pid, pid_t ppid, const char *comm);
+static void rmproc(pid_t pid);
+static Process *correlate(const char *path, time_t timestamp);
+static void logchange(const char *path, Process *proc, uint32_t mask);
+static void cleanup(void);
+static void sighandler(int sig);
+static void scanprocs(void);
+
+static void
+die(const char *fmt, ...)
+{
+ va_list ap;
+ int saved_errno;
+
+ saved_errno = errno;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+
+ if (fmt[0] && fmt[strlen(fmt)-1] == ':')
+ fprintf(stderr, " %s", strerror(saved_errno));
+ fputc('\n', stderr);
+
+ exit(1);
+}
+
+static void
+usage(void)
+{
+ die("usage: who [-o output] directory\n");
+}
+
+int
+main(int argc, char *argv[])
+{
+ const char *outpath;
+ int opt, nfds;
+ struct epoll_event events[MAX_EVENTS];
+
+ outpath = "who.log";
+
+ while ((opt = getopt(argc, argv, "o:")) != -1) {
+ switch (opt) {
+ case 'o':
+ outpath = optarg;
+ break;
+ default:
+ usage();
+ }
+ }
+
+ if (optind >= argc)
+ usage();
+
+ if (strlen(argv[optind]) >= MAX_PATH)
+ die("directory path too long");
+
+ strcpy(dir, argv[optind]);
+
+ signal(SIGINT, sighandler);
+ signal(SIGTERM, sighandler);
+
+ out = fopen(outpath, "w");
+ if (!out)
+ die("fopen %s:", outpath);
+
+ efd = epoll_create1(EPOLL_CLOEXEC);
+ if (efd == -1)
+ die("epoll_create1:");
+
+ nlfd = initnetlink();
+ ifd = initinotify(dir);
+
+ scanprocs();
+
+ state = STATE_MONITORING;
+ fprintf(out, "# Who - started monitoring %s\n", dir);
+ fprintf(out, "# Active processes: %d\n", nprocs);
+ fflush(out);
+
+ while (state == STATE_MONITORING) {
+ nfds = epoll_wait(efd, events, MAX_EVENTS, -1);
+ if (nfds == -1) {
+ if (errno == EINTR)
+ continue;
+ die("epoll_wait:");
+ }
+
+ for (int i = 0; i < nfds; i++) {
+ if (events[i].data.fd == nlfd) {
+ handleproc();
+ } else if (events[i].data.fd == ifd) {
+ handlefile();
+ }
+ }
+ }
+
+ cleanup();
+ return 0;
+}
+
+static void
+sighandler(int sig)
+{
+ (void)sig;
+ state = STATE_SHUTDOWN;
+}
+
+static int
+initnetlink(void)
+{
+ struct sockaddr_nl sa_nl;
+ struct epoll_event ev;
+ int sock;
+
+ sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
+ if (sock == -1)
+ die("socket:");
+
+ memset(&sa_nl, 0, sizeof(sa_nl));
+ sa_nl.nl_family = AF_NETLINK;
+ sa_nl.nl_groups = CN_IDX_PROC;
+ sa_nl.nl_pid = getpid();
+
+ if (bind(sock, (struct sockaddr *)&sa_nl, sizeof(sa_nl)) == -1)
+ die("bind:");
+
+ /* Increase socket buffer size */
+ int bufsize = 65536;
+ if (setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &bufsize, sizeof(bufsize)) == -1)
+ die("setsockopt:");
+
+ struct {
+ struct nlmsghdr nl_hdr;
+ struct cn_msg cn_msg;
+ enum proc_cn_mcast_op cn_mcast;
+ } msg;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.nl_hdr.nlmsg_len = sizeof(msg);
+ msg.nl_hdr.nlmsg_type = NLMSG_DONE;
+ msg.nl_hdr.nlmsg_flags = 0;
+ msg.nl_hdr.nlmsg_seq = 0;
+ msg.nl_hdr.nlmsg_pid = getpid();
+
+ msg.cn_msg.id.idx = CN_IDX_PROC;
+ msg.cn_msg.id.val = CN_VAL_PROC;
+ msg.cn_msg.len = sizeof(enum proc_cn_mcast_op);
+ msg.cn_mcast = PROC_CN_MCAST_LISTEN;
+
+ if (send(sock, &msg, sizeof(msg), 0) == -1)
+ die("send:");
+
+ ev.events = EPOLLIN;
+ ev.data.fd = sock;
+ if (epoll_ctl(efd, EPOLL_CTL_ADD, sock, &ev) == -1)
+ die("epoll_ctl:");
+
+ return sock;
+}
+
+static void
+handleproc(void)
+{
+ struct {
+ struct nlmsghdr nl_hdr;
+ struct cn_msg cn_msg;
+ struct proc_event proc_ev;
+ } msg;
+ ssize_t len;
+ struct proc_event *ev;
+
+ len = recv(nlfd, &msg, sizeof(msg), 0);
+ if (len == -1) {
+ if (errno == EINTR || errno == EAGAIN)
+ return;
+ die("recv:");
+ }
+
+ if (len < (ssize_t)(sizeof(msg.nl_hdr) + sizeof(msg.cn_msg)))
+ return;
+
+ ev = &msg.proc_ev;
+
+ switch (ev->what) {
+ case PROC_EVENT_EXEC:
+ addproc(ev->event_data.exec.process_pid,
+ ev->event_data.exec.process_tgid,
+ "unknown");
+ break;
+ case PROC_EVENT_EXIT:
+ rmproc(ev->event_data.exit.process_pid);
+ break;
+ case PROC_EVENT_FORK:
+ addproc(ev->event_data.fork.child_pid,
+ ev->event_data.fork.parent_pid,
+ "unknown");
+ break;
+ default:
+ break;
+ }
+}
+
+static Process *
+findproc(pid_t pid)
+{
+ int i;
+
+ for (i = 0; i < nprocs; i++) {
+ if (procs[i].active && procs[i].pid == pid)
+ return &procs[i];
+ }
+ return NULL;
+}
+
+static void
+addproc(pid_t pid, pid_t ppid, const char *comm)
+{
+ Process *proc;
+ int i;
+
+ proc = findproc(pid);
+ if (!proc) {
+ for (i = 0; i < MAX_PROCS; i++) {
+ if (!procs[i].active) {
+ proc = &procs[i];
+ nprocs++;
+ break;
+ }
+ }
+ }
+
+ if (!proc)
+ return;
+
+ proc->pid = pid;
+ proc->ppid = ppid;
+ proc->active = 1;
+ proc->start = time(NULL);
+ strncpy(proc->comm, comm, sizeof(proc->comm) - 1);
+ proc->comm[sizeof(proc->comm) - 1] = '\0';
+
+ updateproc(proc);
+}
+
+static void
+rmproc(pid_t pid)
+{
+ Process *proc;
+
+ proc = findproc(pid);
+ if (proc) {
+ proc->active = 0;
+ nprocs--;
+ }
+}
+
+static int
+initinotify(const char *dir)
+{
+ struct epoll_event ev;
+ int fd;
+
+ fd = inotify_init1(IN_CLOEXEC);
+ if (fd == -1)
+ die("inotify_init1:");
+
+ if (addwatch(fd, dir) == -1) {
+ close(fd);
+ die("add_watch_recursive:");
+ }
+
+ ev.events = EPOLLIN;
+ ev.data.fd = fd;
+ if (epoll_ctl(efd, EPOLL_CTL_ADD, fd, &ev) == -1) {
+ close(fd);
+ die("epoll_ctl:");
+ }
+
+ return fd;
+}
+
+static int
+addwatch(int fd, const char *path)
+{
+ DIR *dir;
+ struct dirent *entry;
+ struct stat statbuf;
+ char fullpath[MAX_PATH];
+ int wd;
+
+ wd = inotify_add_watch(fd, path, IN_CREATE | IN_DELETE | IN_MODIFY |
+ IN_MOVED_FROM | IN_MOVED_TO | IN_CLOSE_WRITE);
+ if (wd == -1)
+ return -1;
+
+ dir = opendir(path);
+ if (!dir)
+ return -1;
+
+ while ((entry = readdir(dir)) != NULL) {
+ if (strcmp(entry->d_name, ".") == 0 ||
+ strcmp(entry->d_name, "..") == 0)
+ continue;
+
+ if (snprintf(fullpath, sizeof(fullpath), "%s/%s",
+ path, entry->d_name) >= (int)sizeof(fullpath)) {
+ closedir(dir);
+ return -1;
+ }
+
+ if (stat(fullpath, &statbuf) == -1)
+ continue;
+
+ if (S_ISDIR(statbuf.st_mode)) {
+ if (addwatch(fd, fullpath) == -1) {
+ closedir(dir);
+ return -1;
+ }
+ }
+ }
+
+ closedir(dir);
+ return 0;
+}
+
+static void
+handlefile(void)
+{
+ char buffer[4096], path[MAX_PATH], *ptr;
+ ssize_t len;
+ struct inotify_event *event;
+ Process *proc;
+
+ len = read(ifd, buffer, sizeof(buffer));
+ if (len == -1) {
+ if (errno == EINTR || errno == EAGAIN)
+ return;
+ die("read:");
+ }
+
+ ptr = buffer;
+ while (ptr < buffer + len) {
+ event = (struct inotify_event *)ptr;
+
+ if (event->len > 0) {
+ if (dir[strlen(dir) - 1] == '/') {
+ snprintf(path, sizeof(path), "%s%s",
+ dir, event->name);
+ } else {
+ snprintf(path, sizeof(path), "%s/%s",
+ dir, event->name);
+ }
+
+ proc = correlate(path, time(NULL));
+
+ logchange(path, proc, event->mask);
+
+ if (event->mask & IN_CREATE) {
+ struct stat statbuf;
+ if (stat(path, &statbuf) == 0 &&
+ S_ISDIR(statbuf.st_mode)) {
+ addwatch(ifd, path);
+ }
+ }
+ }
+
+ ptr += sizeof(struct inotify_event) + event->len;
+ }
+}
+
+
+static int
+hasaccess(Process *proc, const char *filepath)
+{
+ char *fdir, *dpath;
+ int cwdlen, dpathlen;
+
+ if (!(fdir = strdup(filepath)))
+ return 0;
+ dpath = dirname(fdir);
+
+ if (!proc->cwd[0] || !strcmp(proc->cwd, "unknown")) {
+ free(fdir);
+ return 0;
+ }
+
+ cwdlen = strlen(proc->cwd);
+ dpathlen = strlen(dpath);
+
+ int result = !strcmp(dpath, proc->cwd) ||
+ (!strncmp(dpath, proc->cwd, cwdlen) && dpath[cwdlen] == '/') ||
+ (!strncmp(proc->cwd, dpath, dpathlen) && proc->cwd[dpathlen] == '/');
+
+ free(fdir);
+ return result;
+}
+
+static Process *
+correlate(const char *path, time_t ts)
+{
+ Process *best, *proc;
+ time_t bestdiff, diff;
+ int i;
+
+ best = NULL;
+ bestdiff = LONG_MAX;
+
+ for (i = 0; i < MAX_PROCS; i++) {
+ proc = &procs[i];
+
+ if (!proc->active || proc->start > ts)
+ continue;
+
+ if (proc->pid < 100 || ts - proc->start > 86400 ||
+ strstr(proc->comm, "kworker") || strstr(proc->comm, "ksoftirqd") ||
+ strstr(proc->comm, "migration") || proc->comm[0] == '[')
+ continue;
+
+ /* Only update process info if it's stale */
+ if (ts - proc->start > 60)
+ updateproc(proc);
+
+ if (!hasaccess(proc, path))
+ continue;
+
+ diff = ts - proc->start;
+ if (diff < bestdiff) {
+ bestdiff = diff;
+ best = proc;
+ }
+ }
+
+ return best;
+}
+
+static const char *
+maskstr(uint32_t mask)
+{
+ if (mask & IN_CREATE)
+ return "CREATE";
+ if (mask & IN_DELETE)
+ return "DELETE";
+ if (mask & IN_MODIFY)
+ return "MODIFY";
+ if (mask & IN_CLOSE_WRITE)
+ return "WRITE";
+ if (mask & IN_MOVED_FROM)
+ return "MOVE_FROM";
+ if (mask & IN_MOVED_TO)
+ return "MOVE_TO";
+ return "UNKNOWN";
+}
+
+static void
+logchange(const char *path, Process *proc, uint32_t mask)
+{
+ time_t now;
+ struct tm *tm;
+ char ts[32];
+
+ now = time(NULL);
+ tm = localtime(&now);
+ strftime(ts, sizeof(ts), "%Y-%m-%d %H:%M:%S", tm);
+
+ if (proc) {
+ fprintf(out, "%s %s %s pid=%d uid=%d gid=%d comm=%s cwd=%s\n",
+ ts, maskstr(mask), path,
+ proc->pid, proc->uid, proc->gid, proc->comm, proc->cwd);
+ } else {
+ fprintf(out, "%s %s %s pid=? uid=? gid=? comm=? cwd=?\n",
+ ts, maskstr(mask), path);
+ }
+
+ fflush(out);
+}
+
+static void
+cleanup(void)
+{
+ if (out) {
+ fprintf(out, "# Who - shutdown\n");
+ fclose(out);
+ }
+ if (nlfd != -1)
+ close(nlfd);
+ if (ifd != -1)
+ close(ifd);
+ if (efd != -1)
+ close(efd);
+}
+
+static void
+scanprocs(void)
+{
+ DIR *proc_dir;
+ struct dirent *entry;
+ pid_t pid;
+ char *endptr;
+
+ proc_dir = opendir("/proc");
+ if (!proc_dir)
+ return;
+
+ while ((entry = readdir(proc_dir)) != NULL) {
+ pid = strtol(entry->d_name, &endptr, 10);
+ if (*endptr != '\0' || pid <= 0)
+ continue;
+
+ addproc(pid, 0, "existing");
+ }
+
+ closedir(proc_dir);
+}
diff --git a/proc.c b/proc.c
new file mode 100644
index 0000000..705e4fe
--- /dev/null
+++ b/proc.c
@@ -0,0 +1,67 @@
+/* See LICENSE file for copyright and license details. */
+
+#define _POSIX_C_SOURCE 200809L
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "proc.h"
+
+void
+getcomm(Process *proc)
+{
+ char path[256], buf[256];
+ FILE *f;
+
+ snprintf(path, sizeof(path), "/proc/%d/comm", proc->pid);
+ if ((f = fopen(path, "r"))) {
+ if (fgets(buf, sizeof(buf), f))
+ buf[strcspn(buf, "\n")] = '\0';
+ strncpy(proc->comm, buf, sizeof(proc->comm) - 1);
+ proc->comm[sizeof(proc->comm) - 1] = '\0';
+ fclose(f);
+ }
+}
+
+void
+getprocwd(Process *proc)
+{
+ char path[256];
+ ssize_t len;
+
+ snprintf(path, sizeof(path), "/proc/%d/cwd", proc->pid);
+ len = readlink(path, proc->cwd, sizeof(proc->cwd) - 1);
+
+ if (len != -1)
+ proc->cwd[len] = '\0';
+ else
+ strcpy(proc->cwd, "unknown");
+}
+
+void
+getids(Process *proc)
+{
+ char path[256], buf[256];
+ FILE *f;
+
+ snprintf(path, sizeof(path), "/proc/%d/status", proc->pid);
+ if ((f = fopen(path, "r"))) {
+ while (fgets(buf, sizeof(buf), f)) {
+ if (!strncmp(buf, "Uid:", 4))
+ sscanf(buf, "Uid:\t%u", &proc->uid);
+ else if (!strncmp(buf, "Gid:", 4))
+ sscanf(buf, "Gid:\t%u", &proc->gid);
+ }
+ fclose(f);
+ }
+}
+
+void
+updateproc(Process *proc)
+{
+ getcomm(proc);
+ getprocwd(proc);
+ getids(proc);
+}
diff --git a/proc.h b/proc.h
new file mode 100644
index 0000000..4cbace9
--- /dev/null
+++ b/proc.h
@@ -0,0 +1,27 @@
+/* See LICENSE file for copyright and license details. */
+
+#ifndef PROC_H
+#define PROC_H
+
+#include <sys/types.h>
+#include <time.h>
+
+#define MAX_PATH 4096
+
+typedef struct Process {
+ pid_t pid;
+ pid_t ppid;
+ uid_t uid;
+ gid_t gid;
+ char comm[16];
+ char cwd[MAX_PATH];
+ time_t start;
+ int active;
+} Process;
+
+void getcomm(Process *proc);
+void getprocwd(Process *proc);
+void getids(Process *proc);
+void updateproc(Process *proc);
+
+#endif