/*
 * trust-file.c - Functions for working with trust files
 * Copyright (c) 2020 Red Hat Inc.
 * All Rights Reserved.
 *
 * This software may be freely redistributed and/or modified under the
 * terms of the GNU General Public License as published by the Free
 * Software Foundation; either version 2, or (at your option) any
 * later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; see the file COPYING. If not, write to the
 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor
 * Boston, MA 02110-1335, USA.
 *
 * Authors:
 *   Zoltan Fridrich <zfridric@redhat.com>
 *   Radovan Sroka   <rsroka@redhat.com>
 */

#include "config.h"

#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <ftw.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/syslog.h>
#include <sys/types.h>
#include <unistd.h>
#include <uthash.h>

#include "fapolicyd-backend.h"
#include "file.h"
#include "llist.h"
#include "message.h"
#include "trust-file.h"
#include "escape.h"
#include "paths.h"
#include "filter.h"

/*
 * fapolicyd-cli relies on this file to materialize trust entries into
 * linked lists so they can be inspected, deduplicated, and rewritten.
 * The daemon also calls into the same helpers when it needs an in-memory
 * snapshot instead of streaming updates through a memfd.  The routines
 * below therefore serve both the CLI's trust management commands and the
 * daemon's backend, with the CLI-only helpers called out explicitly in
 * their documentation.
 */

#define BUFFER_SIZE (4096+1+1+1+10+1+FILE_DIGEST_STRING_WIDTH+1)
#define FILE_READ_FORMAT  "%4096s %lu %64s" // path size SHA256
#define FILE_WRITE_FORMAT "%s %lu %s\n"     // path size SHA256
#define FTW_NOPENFD 1024
#define FTW_FLAGS (FTW_ACTIONRETVAL | FTW_PHYS)

#define HEADER_OLD "# AUTOGENERATED FILE VERSION 2\n"

#define HEADER0 "# AUTOGENERATED FILE VERSION 3\n"
#define HEADER1 "# This file contains a list of trusted files\n"
#define HEADER2 "#\n"
#define HEADER3 "#  FULL PATH        SIZE                             SHA256\n"
#define HEADER4 "# /home/user/my-ls 157984 61a9960bf7d255a85811f4afcac51067b8f2e4c75e21cf4f2af95319d4ed1b87\n"


list_t _list;
char *_path;
int _count;
bool _use_filter;
int _memfd = -1;

struct trust_seen_entry {
	const char *path;
	UT_hash_handle hh;
};


/*
 * make_data_string - Create a trust-file payload for a path.
 * @path: Absolute path that should be represented in the trust file.
 *
 * The resulting buffer contains the "source size hash" triplet used when
 * rewriting trust fragments.  The caller takes ownership of the allocated
 * string and must free it.  Returns NULL if the file cannot be measured.
 */
static char *make_data_string(const char *path)
{
	int fd = open(path, O_RDONLY);
	if (fd < 0) {
		msg(LOG_ERR, "Cannot open %s", path);
		return NULL;
	}

	// Get the size
	struct stat sb;
	if (fstat(fd, &sb)) {
		msg(LOG_ERR, "Cannot stat %s", path);
		close(fd);
		return NULL;
	}

	/*
	 * Non-RPM (file/DEB) trust fragments have always carried SHA256 digests
	 * only. Keep generating that format even though loading now understands
	 * multiple algorithms for RPM-provided fragments.
	 */
	char *hash = get_hash_from_fd2(fd, sb.st_size, FILE_HASH_ALG_SHA256);
	close(fd);
	if (!hash) {
		msg(LOG_ERR, "Cannot hash %s", path);
		return NULL;
	}

	char *line;
	/*
	 * formated data to be saved
	 * source size sha256
	 * path is stored as lmdb index
	 */
	int count = asprintf(&line, DATA_FORMAT, 0,
					  sb.st_size, hash);

	free(hash);

	if (count < 0) {
		msg(LOG_ERR, "Cannot format entry for %s", path);
		return NULL;
	}
	return line;
}
/*
 * write_out_list - Persist a linked list of trust entries to disk.
 * @list: List of entries created by trust_file_load or CLI helpers.
 * @dest: Destination trust file to be rewritten.
 *
 * This helper is used exclusively by the CLI trust management commands
 * after they finish editing an in-memory list.  Returns 0 on success and
 * 1 when the destination file could not be opened.
 */
static int write_out_list(list_t *list, const char *dest)
{
	FILE *f = fopen(dest, "w");
	if (!f) {
		msg(LOG_ERR, "Cannot delete %s", dest);
		list_empty(list);
		return 1;
	}

	size_t hlen;
	hlen = strlen(HEADER0);
	fwrite(HEADER0, hlen, 1, f);

	hlen = strlen(HEADER1);
	fwrite(HEADER1, hlen, 1, f);

	hlen = strlen(HEADER2);
	fwrite(HEADER2, hlen, 1, f);

	hlen = strlen(HEADER3);
	fwrite(HEADER3, hlen, 1, f);

	hlen = strlen(HEADER4);
	fwrite(HEADER4, hlen, 1, f);

	for (list_item_t *lptr = list->first; lptr; lptr = lptr->next) {
		char buf[BUFFER_SIZE + 1];
		const char *data = (char *)(lptr->data);
		const char *path = (char *)lptr->index;

		/*
		 * + 2 because we are omitting source number
		 * "0 12345 ..."
		 * 0 -> filedb source
		 */
		hlen = snprintf(buf, sizeof(buf), "%s %s\n", path, data + 2);
		fwrite(buf, hlen, 1, f);
	}

	fclose(f);
	return 0;
}

/*
 * trust_file_append - Add entries to a trust file for the CLI.
 * @fpath: Path to the trust fragment that should be extended.
 * @list:  List of paths prepared by the CLI for insertion.
 *
 * The CLI populates @list with path indexes and this helper computes the
 * hash/size payloads before merging the new entries into @fpath.  Returns
 * 0 when the update succeeds and 1 if the existing file could not be
 * parsed.
 */
int trust_file_append(const char *fpath, list_t *list)
{
	list_t content;
	list_init(&content);
	int rc = trust_file_load(fpath, &content, -1);
	// if trust file does not exist, we ignore it as it will be created while writing
	if (rc == 2) {
		// exit on parse error, we dont want invalid entries to be autoremoved
		return 1;
	}

	for (list_item_t *lptr = list->first; lptr; lptr = lptr->next) {
		lptr->data = make_data_string(lptr->index);
	}

	list_merge(&content, list);
	write_out_list(&content, fpath);
	list_empty(&content);
	return 0;
}

#define DELIM  ' '
#define MAX_DELIMS 2 // Trustdb has 3 fields - therefore 2 delimiters
/*
 * parse_line_backwards - Split a trust-file line into its components.
 * @line: Buffer containing the raw line (modified in place).
 * @path: Output buffer for the stored path.
 * @size: Output parameter for the recorded size.
 * @sha:  Output buffer for the digest string.
 *
 * Returns 0 when parsing succeeds or -1 when the line is malformed.
 */
static int parse_line_backwards(char *line, char *path, unsigned long *size,
				 char *sha, size_t sha_size)
{
	if (line == NULL || path == NULL || size == NULL || sha == NULL)
		return -1;

	size_t len = strlen(line);

	int count = 0;
	char *delims[MAX_DELIMS] = {0};
	int stripped = 0;
	for (int i = len - 1 ; i >= 0 ; i--) {
		if (!stripped) {
			if (isspace(line[i]))
				line[i] = '\0';
			else {
				stripped = 1;
			}
		}

		if (count == MAX_DELIMS)
			break;

		if (line[i] == DELIM) {
			delims[count++] = &line[i];
		}
	}

	if (count != MAX_DELIMS)
		return -1;

	for (int i = 0 ; i < count ; i++) {
		*(delims[i]) = '\0';
	}

	// save sha to arg
	// right from the last delimiter to the end of the line
	size_t sha_width = &line[len-1] - delims[0];
	if (sha_width >= sha_size)
		sha_width = sha_size - 1;
	memcpy(sha, delims[0] + 1, sha_width);
	sha[sha_width] = '\0';

	// save size to arg
	char number[1024];
	size_t number_size = delims[0] - delims[1]+1;
	memcpy(number, delims[1]+1, number_size);
	char *endptr;
	*size = strtol(number, &endptr, 10);

	// save path to arg
	size_t path_size = delims[1] - line;
	if (path_size >= 4097)
		path_size = 4096;
	memcpy(path, line, path_size);
	path[path_size] = '\0';

	return 0;
}

/*
 * trust_file_load - Load a trust fragment into a list or memfd.
 * @fpath: Full path to the trust fragment.
 * @list:  Destination list when @memfd is negative.
 * @memfd: File descriptor used for streaming output, or -1 for lists.
 *
 * This helper is shared by the daemon and CLI.  It returns 0 on success,
 * 1 when the file cannot be opened, 2 on parse errors, and 3 when memory
 * could not be allocated while tracking duplicates.
 */
int trust_file_load(const char *fpath, list_t *list, int memfd)
{
	char buffer[BUFFER_SIZE];
	int escaped = 0;
	long line = 0;
	int rc = 0;
	struct trust_seen_entry *seen = NULL;

	FILE *file = fopen(fpath, "r");
	if (!file)
		return 1;

	while (fgets(buffer, BUFFER_SIZE, file)) {
		char name[4097], sha[FILE_DIGEST_STRING_MAX], *index = NULL,
			*data = NULL;
		char data_buf[BUFFER_SIZE];
		unsigned long sz;
		unsigned int tsource = SRC_FILE_DB;

		line++;

		if (iscntrl(buffer[0]) || buffer[0] == '#') {
			if (line == 1 &&
			   strncmp(buffer, HEADER_OLD, strlen(HEADER_OLD)) == 0)
				escaped = 1;
			continue;
		}

		if (parse_line_backwards(buffer, name, &sz, sha,
					 sizeof(sha))) {
			msg(LOG_WARNING, "Can't parse %s", buffer);
			rc = 2;
			goto out;
		}

		/*
		 * Infer the algorithm from the digest width instead of trusting
		 * the source.  The helpers in file.c keep the mapping between
		 * printable hex length and binary digest sizes in sync with
		 * upstream algorithm support.
		 */
		size_t digest_len = strlen(sha);
		file_hash_alg_t alg = file_hash_alg(digest_len);
		size_t expected_len = file_hash_length(alg) * 2;

		if (expected_len == 0 || digest_len != expected_len) {
			msg(LOG_WARNING, "Cannot infer digest algorithm for %s",
			    name);
			rc = 2;
			goto out;
		}

		/*
		 * Non-RPM trust fragments historically persisted SHA256
		 * digests only. RPM database ingestion is the only path
		 * that mirrors multiple upstream algorithms, so seeing
		 * anything but SHA256 here likely means the on-disk format
		 * has changed unexpectedly.
		 */
		if (alg != FILE_HASH_ALG_SHA256) {
			msg(LOG_WARNING,"Unsupported digest algorithm %s in %s",
			    file_hash_alg_name(alg), fpath);
			rc = 2;
			goto out;
		}

		int len = snprintf(data_buf, sizeof(data_buf),
				   DATA_FORMAT, tsource, sz, sha);
		if (len < 0 || len >= (int)sizeof(data_buf)) {
			msg(LOG_ERR, "Entry too large in %s", fpath);
			continue;
		}


		/* If the legacy format was used, unescape the stored path. */
		index = escaped ? unescape(name) : strdup(name);
		if (index == NULL) {
			msg(LOG_ERR, "Could not unescape %s from %s", name, fpath);
			continue;
		}

		struct trust_seen_entry *entry;

		HASH_FIND_STR(seen, index, entry);
		if (entry) {
			msg(LOG_WARNING, "%s contains a duplicate %s",
			    fpath, index);
			free(index);
			continue;
		}

		entry = malloc(sizeof(*entry));
		if (!entry) {
			msg(LOG_ERR, "Out of memory tracking %s", index);
			free(index);
			rc = 3;
			goto out;
		}

		entry->path = index;

		if (memfd >= 0) {
			HASH_ADD_KEYPTR(hh, seen, entry->path,
					strlen(entry->path), entry);
			if (dprintf(memfd, "%s %s\n", index, data_buf) < 0)
				msg(LOG_ERR,
				    "dprintf failed writing %s to memfd (%s)",
				    index, strerror(errno));
		} else {
			data = strdup(data_buf);
			if (data == NULL) {
				free(index);
				free(entry);
				continue;
			}

			if (list_append(list, index, data)) {
				free(index);
				free(data);
				free(entry);
			} else // Add it after successfully stored on the list
				HASH_ADD_KEYPTR(hh, seen, entry->path,
						strlen(entry->path), entry);

		}
	}

out:
	fclose(file);

	struct trust_seen_entry *item;

	while (seen) {
		item = seen;
		HASH_DEL(seen, item);
		if (memfd >= 0)
			free((char *)item->path);
		free(item);
	}

	return rc;
}


/*
 * trust_file_delete_path - Remove matching entries from a trust file.
 * @fpath: Path to the trust fragment being edited.
 * @path:  Prefix that identifies entries scheduled for removal.
 *
 * Used only by the CLI trust management commands.  Returns the number of
 * entries deleted, 0 when the file could not be opened,
 * and -1 on parse errors.
 */
int trust_file_delete_path(const char *fpath, const char *path)
{
	list_t list;
	list_init(&list);
	int rc = trust_file_load(fpath, &list, -1);
	switch (rc) {
	case 1:
		msg(LOG_ERR, "Cannot open %s", fpath);
		return 0;
	case 2:
		list_empty(&list);
		return -1;
	default:
		break;
	}

	int count = 0;
	size_t path_len = strlen(path);
	list_item_t *lptr = list.first, *prev = NULL, *tmp;

	while (lptr) {
		if (!strncmp(lptr->index, path, path_len)) {
			++count;
			tmp = lptr->next;

			if (prev)
				prev->next = lptr->next;
			else
				list.first = lptr->next;
			if (!lptr->next)
				list.last = prev;
			--list.count;
			list_destroy_item(&lptr);

			lptr = tmp;
			continue;
		}
		prev = lptr;
		lptr = lptr->next;
	}

	if (count)
		write_out_list(&list, fpath);

	list_empty(&list);
	return count;
}

/*
 * trust_file_update_path - Refresh hashes for matching entries.
 * @fpath: Trust fragment that should be rewritten.
 * @path:  Prefix designating entries that must be re-measured.
 *
 * Used only by the CLI trust management commands.  Returns the number of
 * entries updated, 0 when the file could not be opened,
 * and -1 when the existing file cannot be parsed.
 */
int trust_file_update_path(const char *fpath, const char *path, bool use_filter)
{
	list_t list;
	list_init(&list);
	int rc = trust_file_load(fpath, &list, -1);
	switch (rc) {
	case 1:
		msg(LOG_ERR, "Cannot open %s", fpath);
		return 0;
	case 2:
		list_empty(&list);
		return -1;
	default:
		break;
	}

	int count = 0;
	size_t path_len = strlen(path);

	for (list_item_t *lptr = list.first; lptr; lptr = lptr->next) {
		if (!strncmp(lptr->index, path, path_len)) {
			if (use_filter) {
				filter_rc_t f_res = filter_check(lptr->index);
				if (f_res != FILTER_ALLOW) {
					if (f_res == FILTER_ERR_DEPTH)
						msg(LOG_WARNING,
						    "filter nesting exceeds MAX_FILTER_DEPTH for %s; excluding",
						    (char *)lptr->index);
					continue;
				}
			}
			free((char *)lptr->data);
			lptr->data = make_data_string(lptr->index);
			++count;
		}
	}

	if (count)
		write_out_list(&list, fpath);

	list_empty(&list);
	return count;
}

/*
 * trust_file_rm_duplicates - Prune CLI additions already present on disk.
 * @fpath: Trust fragment checked for duplicates.
 * @list:  Pending CLI additions to compare against existing entries.
 *
 * Used only by the CLI trust management commands before appending new
 * entries.  Returns 0 after pruning,
 * or -1 when the trust fragment could not be opened or parsed.
 */
int trust_file_rm_duplicates(const char *fpath, list_t *list)
{
	list_t trust_file;
	list_init(&trust_file);
	int rc = trust_file_load(fpath, &trust_file, -1);
	switch (rc) {
	case 1:
		msg(LOG_ERR, "Cannot open %s", fpath);
		return -1;
	case 2:
		list_empty(&trust_file);
		return -1;
	default:
		break;
	}

	for (list_item_t *lptr = trust_file.first; lptr; lptr = lptr->next) {
		list_remove(list, lptr->index);
		if (list->count == 0)
			break;
	}

	list_empty(&trust_file);
	return 0;
}



/*
 * ftw_load - nftw callback that aggregates trust fragments.
 * @fpath: Current file discovered by nftw.
 * @sb:    (unused) file metadata supplied by nftw.
 * @typeflag: nftw entry type.
 * @ftwbuf:   (unused) traversal context from nftw.
 */
static int ftw_load(const char *fpath,
		const struct stat *sb __attribute__ ((unused)),
		int typeflag,
		struct FTW *ftwbuf __attribute__ ((unused)))
{
	if (typeflag == FTW_F)
		trust_file_load(fpath, &_list, _memfd);
	return FTW_CONTINUE;
}

/*
 * ftw_delete_path - nftw callback that deletes matching entries.
 * @fpath: Current trust fragment examined by nftw.
 * @sb:    (unused) file metadata supplied by nftw.
 * @typeflag: nftw entry type.
 * @ftwbuf:   (unused) traversal context from nftw.
 */
static int ftw_delete_path(const char *fpath,
		const struct stat *sb __attribute__ ((unused)),
		int typeflag,
		struct FTW *ftwbuf __attribute__ ((unused)))
{
	if (typeflag == FTW_F)
		_count += trust_file_delete_path(fpath, _path);
	return FTW_CONTINUE;
}

/*
 * ftw_update_path - nftw callback that updates matching entries.
 * @fpath: Current trust fragment examined by nftw.
 * @sb:    (unused) file metadata supplied by nftw.
 * @typeflag: nftw entry type.
 * @ftwbuf:   (unused) traversal context from nftw.
 */
static int ftw_update_path(const char *fpath,
		const struct stat *sb __attribute__ ((unused)),
		int typeflag,
		struct FTW *ftwbuf __attribute__ ((unused)))
{
	if (typeflag == FTW_F)
		_count += trust_file_update_path(fpath, _path, _use_filter);
	return FTW_CONTINUE;
}

/*
 * ftw_rm_duplicates - nftw callback removing duplicates from CLI lists.
 * @fpath: Current trust fragment examined by nftw.
 * @sb:    (unused) file metadata supplied by nftw.
 * @typeflag: nftw entry type.
 * @ftwbuf:   (unused) traversal context from nftw.
 */
static int ftw_rm_duplicates(const char *fpath,
		const struct stat *sb __attribute__ ((unused)),
		int typeflag,
		struct FTW *ftwbuf __attribute__ ((unused)))
{
	if (_list.count == 0)
		return FTW_STOP;
	if (typeflag == FTW_F)
		trust_file_rm_duplicates(fpath, &_list);
	return FTW_CONTINUE;
}



/*
 * trust_file_load_all - Aggregate every trust fragment.
 * @list:  Destination list when @memfd is negative.
 * @memfd: File descriptor that receives streamed entries, or -1.
 *
 * Used by both the daemon and CLI to populate either an in-memory list or
 * a memfd-backed snapshot covering the primary trust file plus the tree
 * of per-package fragments.
 */
void trust_file_load_all(list_t *list, int memfd)
{
	list_empty(&_list);
	_memfd = memfd;
	/* Populate either the in-memory list or the memfd snapshot. */
	trust_file_load(TRUST_FILE_PATH, &_list, memfd);
	nftw(TRUST_DIR_PATH, &ftw_load, FTW_NOPENFD, FTW_FLAGS);
	if (memfd < 0) {
		if (list)
			list_merge(list, &_list);
	} else
		list_empty(&_list);
	_memfd = -1;
}

/*
 * trust_file_delete_path_all - Delete matching entries across all files.
 * @path: Prefix designating entries to remove.
 *
 * Used only by the CLI trust management commands to remove a path from
 * every trust fragment.  Returns the number of entries deleted.
 */
int trust_file_delete_path_all(const char *path)
{
	_path = strdup(path);
	_count = trust_file_delete_path(TRUST_FILE_PATH, path);
	nftw(TRUST_DIR_PATH, &ftw_delete_path, FTW_NOPENFD, FTW_FLAGS);
	free(_path);
	return _count;
}

/*
 * trust_file_update_path_all - Refresh hashes across every trust file.
 * @path: Prefix designating entries that must be re-measured.
 *
 * Used only by the CLI trust management commands.  Returns the number of
 * entries updated.
 */
int trust_file_update_path_all(const char *path, bool use_filter)
{
	_path = strdup(path);
	_use_filter = use_filter;
	_count = trust_file_update_path(TRUST_FILE_PATH, path, _use_filter);
	nftw(TRUST_DIR_PATH, &ftw_update_path, FTW_NOPENFD, FTW_FLAGS);
	free(_path);
	_use_filter = false;
	return _count;
}

/*
 * trust_file_rm_duplicates_all - Remove duplicates across trust files.
 * @list: Pending CLI additions to prune before appending.
 *
 * Used only by the CLI trust management commands prior to calling
 * trust_file_append().
 */
void trust_file_rm_duplicates_all(list_t *list)
{
	list_empty(&_list);
	list_merge(&_list, list);
	trust_file_rm_duplicates(TRUST_FILE_PATH, &_list);
	nftw(TRUST_DIR_PATH, &ftw_rm_duplicates, FTW_NOPENFD, FTW_FLAGS);
	list_merge(list, &_list);
}
