[PATCH] lockfile: Keep lockfile owned by root
by Nir Soffer
From: Nir Soffer <nsoffer(a)redhat.com>
On Fedora 28, sanlock fails to create the lockfile before dropping
privileges, because /run/sanlock is owned by sanlock, and selinux
disables DAC_OVERRIDE.
To allow root to create the lockfile before dropping privileges
/run/sanlock is owned by group root, and group writable. Since sanlock
never write to the lockfile after dropping privileges, keep the lockfile
owned by root.
Here are /run/sanlock permissions with this change:
$ ls -lhdZ /run/sanlock
drwxrwxr-x. 2 sanlock root system_u:object_r:sanlock_var_run_t:s0 80 Nov 29 23:07 /run/sanlock
$ ls -lhZ /run/sanlock
total 4.0K
-rw-r--r--. 1 root root system_u:object_r:sanlock_var_run_t:s0 5 Nov 29 23:07 sanlock.pid
srw-rw----. 1 sanlock sanlock system_u:object_r:sanlock_var_run_t:s0 0 Nov 29 23:07 sanlock.sock
Signed-off-by: Nir Soffer <nsoffer(a)redhat.com>
---
src/lockfile.c | 12 ++++--------
src/main.c | 6 +++++-
2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/src/lockfile.c b/src/lockfile.c
index 5a2518e..cffaaff 100644
--- a/src/lockfile.c
+++ b/src/lockfile.c
@@ -19,41 +19,44 @@
#include <time.h>
#include <syslog.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <sys/un.h>
#include "sanlock_internal.h"
#include "log.h"
#include "lockfile.h"
int lockfile(const char *dir, const char *name, int uid, int gid)
{
char path[PATH_MAX];
char buf[16];
struct flock lock;
mode_t old_umask;
int fd, rv;
- old_umask = umask(0022);
+ /* Make rundir group writable, allowing creation of the lockfile when
+ * starting as root. */
+
+ old_umask = umask(0002);
rv = mkdir(dir, 0775);
if (rv < 0 && errno != EEXIST) {
umask(old_umask);
return rv;
}
umask(old_umask);
rv = chown(dir, uid, gid);
if (rv < 0) {
log_error("lockfile chown error %s: %s",
dir, strerror(errno));
return rv;
}
snprintf(path, PATH_MAX, "%s/%s", dir, name);
fd = open(path, O_CREAT|O_WRONLY|O_CLOEXEC, 0644);
if (fd < 0) {
log_error("lockfile open error %s: %s",
path, strerror(errno));
@@ -72,32 +75,25 @@ int lockfile(const char *dir, const char *name, int uid, int gid)
goto fail;
}
rv = ftruncate(fd, 0);
if (rv < 0) {
log_error("lockfile truncate error %s: %s",
path, strerror(errno));
goto fail;
}
memset(buf, 0, sizeof(buf));
snprintf(buf, sizeof(buf), "%d\n", getpid());
rv = write(fd, buf, strlen(buf));
if (rv <= 0) {
log_error("lockfile write error %s: %s",
path, strerror(errno));
goto fail;
}
- rv = fchown(fd, uid, gid);
- if (rv < 0) {
- log_error("lockfile fchown error %s: %s",
- path, strerror(errno));
- goto fail;
- }
-
return fd;
fail:
close(fd);
return -1;
}
diff --git a/src/main.c b/src/main.c
index b3898e2..9538cc5 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1665,42 +1665,46 @@ static int do_daemon(void)
setup_task_aio(&main_task, com.aio_arg, 0);
rv = client_alloc();
if (rv < 0)
return rv;
helper_ci = client_add(helper_status_fd, process_helper, helper_dead);
if (helper_ci < 0)
return rv;
strcpy(client[helper_ci].owner_name, "helper");
setup_signals();
setup_logging();
if (strcmp(run_dir, DEFAULT_RUN_DIR))
log_warn("Using non-standard run directory '%s'", run_dir);
if (!privileged)
log_warn("Running in unprivileged mode");
+ /* If we run as root, make run_dir owned by root, so we can create the
+ * lockfile when selinux disables DAC_OVERRIDE.
+ * See https://danwalsh.livejournal.com/79643.html */
- fd = lockfile(run_dir, SANLK_LOCKFILE_NAME, com.uid, com.gid);
+ fd = lockfile(run_dir, SANLK_LOCKFILE_NAME, com.uid,
+ privileged ? 0 : com.gid);
if (fd < 0) {
close_logging();
return fd;
}
setup_host_name();
setup_uid_gid();
log_warn("sanlock daemon started %s host %s", VERSION, our_host_name_global);
setup_priority();
rv = thread_pool_create(DEFAULT_MIN_WORKER_THREADS, com.max_worker_threads);
if (rv < 0)
goto out;
rv = setup_listener();
if (rv < 0)
goto out_threads;
--
2.17.2
4 years, 10 months
[sanlock] branch master updated: sanlock: use default
max_sectors_kb 1024
by git repository hosting
This is an automated email from the git hooks/post-receive script.
teigland pushed a commit to branch master
in repository sanlock.
The following commit(s) were added to refs/heads/master by this push:
new bf8058b sanlock: use default max_sectors_kb 1024
bf8058b is described below
commit bf8058be781e23a8d2c2b99581faaf46a8d258b5
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Tue Nov 27 12:32:40 2018 -0600
sanlock: use default max_sectors_kb 1024
By default set max_sectors_kb to perform 1 MiB read ios.
---
src/cmd.c | 6 ++++++
src/main.c | 3 +++
src/sanlock_internal.h | 6 +++---
3 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/src/cmd.c b/src/cmd.c
index 40079af..d816fd7 100644
--- a/src/cmd.c
+++ b/src/cmd.c
@@ -2260,6 +2260,9 @@ static int print_state_daemon(char *str)
"gid=%d "
"uid=%d "
"sh_retries=%d "
+ "max_sectors_kb_ignore=%d "
+ "max_sectors_kb_align=%d "
+ "max_sectors_kb_num=%d "
"use_aio=%d "
"kill_grace_seconds=%d "
"helper_pid=%d "
@@ -2281,6 +2284,9 @@ static int print_state_daemon(char *str)
com.gid,
com.uid,
com.sh_retries,
+ com.max_sectors_kb_ignore,
+ com.max_sectors_kb_align,
+ com.max_sectors_kb_num,
main_task.use_aio,
kill_grace_seconds,
helper_pid,
diff --git a/src/main.c b/src/main.c
index 6cbc7bf..b3898e2 100644
--- a/src/main.c
+++ b/src/main.c
@@ -3669,6 +3669,9 @@ int main(int argc, char *argv[])
com.renewal_read_extend_sec = 0;
com.renewal_history_size = DEFAULT_RENEWAL_HISTORY_SIZE;
com.paxos_debug_all = 0;
+ com.max_sectors_kb_ignore = DEFAULT_MAX_SECTORS_KB_IGNORE;
+ com.max_sectors_kb_align = DEFAULT_MAX_SECTORS_KB_ALIGN;
+ com.max_sectors_kb_num = DEFAULT_MAX_SECTORS_KB_NUM;
if (getgrnam("sanlock") && getpwnam("sanlock")) {
com.uname = (char *)"sanlock";
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index ebf9946..3a09b62 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -330,9 +330,9 @@ EXTERN struct client *client;
#define DEFAULT_QUIET_FAIL 1
#define DEFAULT_RENEWAL_HISTORY_SIZE 180 /* about 1 hour with 20 sec renewal interval */
-#define DEFAULT_MAX_SECTORS_KB_IGNORE 1 /* don't change it */
-#define DEFAULT_MAX_SECTORS_KB_ALIGN 0 /* set it to align size */
-#define DEFAULT_MAX_SECTORS_KB_NUM 0 /* set it to num KB for all lockspaces */
+#define DEFAULT_MAX_SECTORS_KB_IGNORE 0 /* don't change it */
+#define DEFAULT_MAX_SECTORS_KB_ALIGN 0 /* set it to align size */
+#define DEFAULT_MAX_SECTORS_KB_NUM 1024 /* set it to num KB for all lockspaces */
struct command_line {
int type; /* COM_ */
--
To stop receiving notification emails like this one, please contact
the administrator of this repository.
4 years, 10 months
[sanlock] branch master updated: tox: Make it easy to run the tests
by git repository hosting
This is an automated email from the git hooks/post-receive script.
teigland pushed a commit to branch master
in repository sanlock.
The following commit(s) were added to refs/heads/master by this push:
new 7cab627 tox: Make it easy to run the tests
7cab627 is described below
commit 7cab6271642286fc3bb65cd3d259aa0e2c777cda
Author: Nir Soffer <nsoffer(a)redhat.com>
AuthorDate: Mon Nov 26 02:11:20 2018 +0200
tox: Make it easy to run the tests
tox make it easy to run python tests with multiple versions of python.
To run all python tests:
$ tox
This command:
- sets environment variables for the test, without modifying your shell
- creates python virtual environment for python 2.7 and python 3.6 if
needed.
- installs the test dependencies in the virtual environment without
modifying system python.
- builds wdmd, sanlock and the python module
- runs the tests with both python versions
Note: python 3.6 fails now because the python module does not compile
yet on python 3.
To run only the python 2.7 tests:
$ tox -e py27
README.dev was simplified; there is not need now to install pytest,
run make, or set environment variables.
pytest.ini was merged into tox.ini.
Signed-off-by: Nir Soffer <nsoffer(a)redhat.com>
---
README.dev | 27 +++++++++++++++------------
pytest.ini | 7 -------
tox.ini | 34 ++++++++++++++++++++++++++++++++++
3 files changed, 49 insertions(+), 19 deletions(-)
diff --git a/README.dev b/README.dev
index 04fbb02..3519993 100644
--- a/README.dev
+++ b/README.dev
@@ -1,23 +1,26 @@
How to test sanlock
===================
-To run the python based tests, you need pytest. The best way to install a
+To run the python based tests, you need tox. The best way to install a
recent version is to use pip:
- $ pip install pytest
+ $ pip install tox
-Before running the tests, you need to build wdmd, sanlock, and sanlock
-python bindings:
+To run the tests with python 2.7 and 3.6:
- $ make -C wdmd
- $ make -C src
- $ make -C python inplace
+ $ tox
-Setup the environment for testing sanlock running sanlcok from source as
-current user:
+Note: python 3.6 tests will fail now, since sanlock extension module needs
+changes to compile on python 3.
- $ source tests/env.sh
+To run only python 2.7:
-To run the tests:
+ $ tox -e py27
- $ pytest
+To run only test from some modules:
+
+ $ tox tests/daemon_test.py
+
+To run only tests matching the substring "foo":
+
+ $ tox -- -k foo
diff --git a/pytest.ini b/pytest.ini
deleted file mode 100644
index bb17bd5..0000000
--- a/pytest.ini
+++ /dev/null
@@ -1,7 +0,0 @@
-[pytest]
-# Notes:
-# --basetemp: we must use /var/tmp as sanlock uses direct I/O.
-# -vv: increasing verbosify twice shows more detailed failures tracebacks.
-# -rxs: show extra test summary: (s)skipped, (x)failed
-# --durations: show slowest test duration
-addopts = -rxs -vv --basetemp=/var/tmp/sanlock --durations=10
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..ae9c8a7
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,34 @@
+# Tox (http://tox.testrun.org/) is a tool for running tests
+# in multiple virtualenvs. This configuration file will run the
+# test suite on all supported python versions. To use it, "pip install tox"
+# and then run "tox" from this directory.
+
+[tox]
+envlist = py27,py36
+skipsdist = True
+skip_missing_interpreters = True
+
+[testenv]
+passenv = USER
+setenv =
+ LD_LIBRARY_PATH={env:PWD}/wdmd:{env:PWD}/src
+ SANLOCK_PRIVILEGED=0
+ SANLOCK_RUN_DIR=/tmp/sanlock
+ PYTHONPATH={env:PWD}/python
+sitepackages = True
+whitelist_externals = make
+deps =
+ pytest==4.0
+commands =
+ make -C wdmd
+ make -C src
+ make -C python inplace
+ pytest {posargs}
+
+[pytest]
+# Notes:
+# --basetemp: we must use /var/tmp as sanlock uses direct I/O.
+# -vv: increasing verbosify twice shows more detailed failures tracebacks.
+# -rxs: show extra test summary: (s)skipped, (x)failed
+# --durations: show slowest test duration
+addopts = -rxs -vv --basetemp=/var/tmp/sanlock --durations=10
--
To stop receiving notification emails like this one, please contact
the administrator of this repository.
4 years, 10 months
[PATCH] tox: Make it easy to run the tests
by Nir Soffer
From: Nir Soffer <nsoffer(a)redhat.com>
tox make it easy to run python tests with multiple versions of python.
To run all python tests:
$ tox
This command:
- sets environment variables for the test, without modifying your shell
- creates python virtual environment for python 2.7 and python 3.6 if
needed.
- installs the test dependencies in the virtual environment without
modifying system python.
- builds wdmd, sanlock and the python module
- runs the tests with both python versions
Note: python 3.6 fails now because the python module does not compile
yet on python 3.
To run only the python 2.7 tests:
$ tox -e py27
README.dev was simplified; there is not need now to install pytest,
run make, or set environment variables.
pytest.ini was merged into tox.ini.
Signed-off-by: Nir Soffer <nsoffer(a)redhat.com>
---
README.dev | 27 +++++++++++++++------------
pytest.ini | 7 -------
tox.ini | 34 ++++++++++++++++++++++++++++++++++
3 files changed, 49 insertions(+), 19 deletions(-)
delete mode 100644 pytest.ini
create mode 100644 tox.ini
diff --git a/README.dev b/README.dev
index 04fbb02..3519993 100644
--- a/README.dev
+++ b/README.dev
@@ -1,23 +1,26 @@
How to test sanlock
===================
-To run the python based tests, you need pytest. The best way to install a
+To run the python based tests, you need tox. The best way to install a
recent version is to use pip:
- $ pip install pytest
+ $ pip install tox
-Before running the tests, you need to build wdmd, sanlock, and sanlock
-python bindings:
+To run the tests with python 2.7 and 3.6:
- $ make -C wdmd
- $ make -C src
- $ make -C python inplace
+ $ tox
-Setup the environment for testing sanlock running sanlcok from source as
-current user:
+Note: python 3.6 tests will fail now, since sanlock extension module needs
+changes to compile on python 3.
- $ source tests/env.sh
+To run only python 2.7:
-To run the tests:
+ $ tox -e py27
- $ pytest
+To run only test from some modules:
+
+ $ tox tests/daemon_test.py
+
+To run only tests matching the substring "foo":
+
+ $ tox -- -k foo
diff --git a/pytest.ini b/pytest.ini
deleted file mode 100644
index bb17bd5..0000000
--- a/pytest.ini
+++ /dev/null
@@ -1,7 +0,0 @@
-[pytest]
-# Notes:
-# --basetemp: we must use /var/tmp as sanlock uses direct I/O.
-# -vv: increasing verbosify twice shows more detailed failures tracebacks.
-# -rxs: show extra test summary: (s)skipped, (x)failed
-# --durations: show slowest test duration
-addopts = -rxs -vv --basetemp=/var/tmp/sanlock --durations=10
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..ae9c8a7
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,34 @@
+# Tox (http://tox.testrun.org/) is a tool for running tests
+# in multiple virtualenvs. This configuration file will run the
+# test suite on all supported python versions. To use it, "pip install tox"
+# and then run "tox" from this directory.
+
+[tox]
+envlist = py27,py36
+skipsdist = True
+skip_missing_interpreters = True
+
+[testenv]
+passenv = USER
+setenv =
+ LD_LIBRARY_PATH={env:PWD}/wdmd:{env:PWD}/src
+ SANLOCK_PRIVILEGED=0
+ SANLOCK_RUN_DIR=/tmp/sanlock
+ PYTHONPATH={env:PWD}/python
+sitepackages = True
+whitelist_externals = make
+deps =
+ pytest==4.0
+commands =
+ make -C wdmd
+ make -C src
+ make -C python inplace
+ pytest {posargs}
+
+[pytest]
+# Notes:
+# --basetemp: we must use /var/tmp as sanlock uses direct I/O.
+# -vv: increasing verbosify twice shows more detailed failures tracebacks.
+# -rxs: show extra test summary: (s)skipped, (x)failed
+# --durations: show slowest test duration
+addopts = -rxs -vv --basetemp=/var/tmp/sanlock --durations=10
--
2.17.2
4 years, 10 months
[sanlock] branch master updated (05cb313 -> 2cc91bc)
by git repository hosting
This is an automated email from the git hooks/post-receive script.
teigland pushed a change to branch master
in repository sanlock.
from 05cb313 sanlock: allow setting max_sectors_kb
new 3981ad5 sanlock: remove posix aio code
new 2cc91bc sanlock: improve non-aio usage
The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
src/diskio.c | 252 +++++++++++++++++++++++++++++++----------------------------
src/main.c | 4 +
2 files changed, 136 insertions(+), 120 deletions(-)
--
To stop receiving notification emails like this one, please contact
the administrator of this repository.
4 years, 10 months
[sanlock] branch master updated: sanlock: allow setting
max_sectors_kb
by git repository hosting
This is an automated email from the git hooks/post-receive script.
teigland pushed a commit to branch master
in repository sanlock.
The following commit(s) were added to refs/heads/master by this push:
new 05cb313 sanlock: allow setting max_sectors_kb
05cb313 is described below
commit 05cb3135589742cec05e41563eae54fb26079656
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Wed Nov 7 14:17:31 2018 -0600
sanlock: allow setting max_sectors_kb
Allow sanlock to set this sysfs file:
/sys/dev/block/<major>:<minor>/queue/max_sectors_kb
to optimize read io's sent to a storage device. By default
this value is 512K which means each 1M read is split into
two 512K reads that are sent to storage. If set to 1024,
then each 1M read is sent to storage without being split.
The sanlock behavior is controlled by the max_sectors_kb
setting in sanlock.conf:
max_sectors_kb = ignore
sanlock doesn't set it (the current default).
max_sectors_kb = align
sanlock will set it to the lockspace align size (1M/2M/4M/8M)
when adding the lockspace.
max_sectors_kb = <num_kb>
sanlock will set it to <num_kb> for each lockspace.
---
src/diskio.c | 123 +++++++++++++++++++++++++++++++++++++++++++++++++
src/diskio.h | 4 ++
src/lockspace.c | 76 ++++++++++++++++++++++++++++++
src/main.c | 25 ++++++++++
src/sanlock.8 | 9 ++++
src/sanlock.conf | 3 ++
src/sanlock_internal.h | 7 +++
7 files changed, 247 insertions(+)
diff --git a/src/diskio.c b/src/diskio.c
index 8aa654d..83b0b1e 100644
--- a/src/diskio.c
+++ b/src/diskio.c
@@ -21,16 +21,138 @@
#include <sys/types.h>
#include <sys/time.h>
#include <sys/stat.h>
+#include <sys/sysmacros.h>
#include <blkid/blkid.h>
#include <libaio.h> /* linux aio */
#include <aio.h> /* posix aio */
+#include <aio.h> /* posix aio */
#include "sanlock_internal.h"
#include "diskio.h"
#include "direct.h"
#include "log.h"
+int read_sysfs_size(const char *disk_path, const char *name, unsigned int *val)
+{
+ char path[PATH_MAX];
+ char buf[32];
+ struct stat st;
+ int major, minor;
+ size_t len;
+ int fd;
+ int rv = -1;
+
+ rv = stat(disk_path, &st);
+ if (rv < 0)
+ return -1;
+
+ major = (int)major(st.st_rdev);
+ minor = (int)minor(st.st_rdev);
+
+ snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/queue/%s", major, minor, name);
+
+ fd = open(path, O_RDONLY, 0);
+ if (fd < 0)
+ return -1;
+
+ rv = read(fd, buf, sizeof(buf));
+ if (rv < 0) {
+ close(fd);
+ return -1;
+ }
+
+ if ((len = strlen(buf)) && buf[len - 1] == '\n')
+ buf[--len] = '\0';
+
+ if (strlen(buf)) {
+ *val = atoi(buf);
+ rv = 0;
+ }
+
+ close(fd);
+ return rv;
+}
+
+static int write_sysfs_size(const char *disk_path, const char *name, unsigned int val)
+{
+ char path[PATH_MAX];
+ char buf[32];
+ struct stat st;
+ int major, minor;
+ int fd;
+ int rv;
+
+ rv = stat(disk_path, &st);
+ if (rv < 0) {
+ log_debug("write_sysfs_size stat error %d %s", errno, disk_path);
+ return -1;
+ }
+
+ major = (int)major(st.st_rdev);
+ minor = (int)minor(st.st_rdev);
+
+ snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/queue/%s", major, minor, name);
+
+ memset(buf, 0, sizeof(buf));
+ snprintf(buf, sizeof(buf), "%u", val);
+
+ fd = open(path, O_RDWR, 0);
+ if (fd < 0) {
+ log_debug("write_sysfs_size open error %d %s", errno, path);
+ return -1;
+ }
+
+ rv = write(fd, buf, strlen(buf));
+ if (rv < 0) {
+ log_debug("write_sysfs_size write %s error %d %s", buf, errno, path);
+ close(fd);
+ return -1;
+ }
+
+ close(fd);
+ return 0;
+}
+
+/*
+ * The default max_sectors_kb is 512 (KB), so a 1MB read is split into two
+ * 512KB reads. Adjust this to at least do 1MB io's.
+ */
+
+int set_max_sectors_kb(struct sync_disk *disk, uint32_t set_kb)
+{
+ unsigned int max_kb = 0;
+ int rv;
+
+ rv = read_sysfs_size(disk->path, "max_sectors_kb", &max_kb);
+ if (rv < 0) {
+ log_debug("set_max_sectors_kb read error %d %s", rv, disk->path);
+ return rv;
+ }
+
+ if (max_kb == set_kb)
+ return 0;
+
+ rv = write_sysfs_size(disk->path, "max_sectors_kb", set_kb);
+ if (rv < 0) {
+ log_debug("set_max_sectors_kb write %u error %d %s", set_kb, rv, disk->path);
+ return rv;
+ }
+
+ return 0;
+}
+
+int get_max_sectors_kb(struct sync_disk *disk, uint32_t *max_sectors_kb)
+{
+ unsigned int max = 0;
+ int rv;
+
+ rv = read_sysfs_size(disk->path, "max_sectors_kb", &max);
+ if (!rv)
+ *max_sectors_kb = max;
+ return rv;
+}
+
static int set_disk_properties(struct sync_disk *disk)
{
blkid_probe probe;
@@ -47,6 +169,7 @@ static int set_disk_properties(struct sync_disk *disk)
blkid_free_probe(probe);
disk->sector_size = sector_size;
+
return 0;
}
diff --git a/src/diskio.h b/src/diskio.h
index 772f743..1ed65d2 100644
--- a/src/diskio.h
+++ b/src/diskio.h
@@ -17,6 +17,10 @@ int open_disks(struct sync_disk *disks, int num_disks);
int open_disks_fd(struct sync_disk *disks, int num_disks);
int majority_disks(int num_disks, int num);
+int read_sysfs_size(const char *path, const char *name, unsigned int *val);
+int set_max_sectors_kb(struct sync_disk *disk, uint32_t max_sectors_kb);
+int get_max_sectors_kb(struct sync_disk *disk, uint32_t *max_sectors_kb);
+
/*
* iobuf functions require the caller to allocate iobuf using posix_memalign
* and pass it into the function
diff --git a/src/lockspace.c b/src/lockspace.c
index dbc999e..41f96cb 100644
--- a/src/lockspace.c
+++ b/src/lockspace.c
@@ -23,6 +23,7 @@
#include <sys/socket.h>
#include <sys/time.h>
#include <sys/un.h>
+#include <sys/stat.h>
#include "sanlock_internal.h"
#include "sanlock_admin.h"
@@ -580,6 +581,79 @@ static void save_renewal_history(struct space *sp, int delta_result,
}
}
+#define ONE_MB_IN_BYTES 1048576
+#define ONE_MB_IN_KB 1024
+
+static void set_lockspace_max_sectors_kb(struct space *sp, int sector_size, int align_size)
+{
+ struct stat st;
+ int align_size_kb = align_size / 1024; /* align_size is in bytes */
+ unsigned int hw_kb = 0;
+ unsigned int set_kb = 0;
+ int rv;
+
+ if (fstat(sp->host_id_disk.fd, &st) < 0) {
+ log_erros(sp, "set_lockspace_max_sectors_kb fstat error %d", errno);
+ return;
+ }
+
+ /* file not device */
+ if (S_ISREG(st.st_mode))
+ return;
+
+ if (com.max_sectors_kb_ignore)
+ return;
+ else if (com.max_sectors_kb_align)
+ set_kb = align_size_kb;
+ else if (com.max_sectors_kb_num)
+ set_kb = com.max_sectors_kb_num;
+ else
+ return;
+
+ rv = read_sysfs_size(sp->host_id_disk.path, "max_hw_sectors_kb", &hw_kb);
+ if (rv < 0 || !hw_kb) {
+ log_space(sp, "set_lockspace_max_sectors_kb max_hw_sectors_kb unknown %d %u", rv, hw_kb);
+ return;
+ }
+
+ if (hw_kb < set_kb) {
+ /*
+ * If the hardware won't support requested size, try setting 1MB.
+ */
+ if (hw_kb < ONE_MB_IN_KB) {
+ log_space(sp, "set_lockspace_max_sectors_kb small hw_kb %u req_kb %u", hw_kb, set_kb);
+ return;
+ }
+
+ if (set_kb < 1024) {
+ log_space(sp, "set_lockspace_max_sectors_kb small hw_kb %u small req_kb %u", hw_kb, set_kb);
+ return;
+ }
+
+ set_kb = ONE_MB_IN_KB;
+
+ log_space(sp, "set_lockspace_max_sectors_kb small hw_kb %u using 1024", hw_kb);
+
+ rv = set_max_sectors_kb(&sp->host_id_disk, set_kb);
+ if (rv < 0) {
+ log_space(sp, "set_lockspace_max_sectors_kb small hw_kb %u set 1024 error %d", hw_kb, rv);
+ return;
+ }
+ } else {
+ /*
+ * Tell the kernel to send hardware io's as large as the lease size.
+ */
+
+ log_space(sp, "set_lockspace_max_sectors_kb hw_kb %u setting %u", hw_kb, set_kb);
+
+ rv = set_max_sectors_kb(&sp->host_id_disk, set_kb);
+ if (rv < 0) {
+ log_space(sp, "set_lockspace_max_sectors_kb hw_kb %u set %u error %d", hw_kb, set_kb, rv);
+ return;
+ }
+ }
+}
+
/*
* This thread must not be stopped unless all pids that may be using any
* resources in it are dead/gone. (The USED flag in the lockspace represents
@@ -665,6 +739,8 @@ static void *lockspace_thread(void *arg_in)
sp->align_size = align_size;
sp->max_hosts = max_hosts;
+ set_lockspace_max_sectors_kb(sp, sector_size, align_size);
+
sp->lease_status.renewal_read_buf = malloc(sp->align_size);
if (!sp->lease_status.renewal_read_buf) {
acquire_result = -ENOMEM;
diff --git a/src/main.c b/src/main.c
index 1767328..e4514e7 100644
--- a/src/main.c
+++ b/src/main.c
@@ -23,6 +23,7 @@
#include <sched.h>
#include <pwd.h>
#include <grp.h>
+#include <ctype.h>
#include <sys/types.h>
#include <sys/prctl.h>
#include <sys/wait.h>
@@ -2651,6 +2652,30 @@ static void read_config_file(void)
com.debug_io_submit = 1;
if (strstr(str, "complete"))
com.debug_io_complete = 1;
+
+ } else if (!strcmp(str, "max_sectors_kb")) {
+ memset(str, 0, sizeof(str));
+ get_val_str(line, str);
+ if (strstr(str, "ignore")) {
+ com.max_sectors_kb_ignore = 1;
+ com.max_sectors_kb_align = 0;
+ com.max_sectors_kb_num = 0;
+ } else if (strstr(str, "align")) {
+ com.max_sectors_kb_ignore = 0;
+ com.max_sectors_kb_align = 1;
+ com.max_sectors_kb_num = 0;
+ } else if (isdigit(str[0])) {
+ int num = atoi(str);
+ if (!num || (num % 2) || (num > 8192)) {
+ log_error("ignore invalid num max_sectors_kb %s", str);
+ } else {
+ com.max_sectors_kb_ignore = 0;
+ com.max_sectors_kb_align = 0;
+ com.max_sectors_kb_num = num;
+ }
+ } else {
+ log_error("ignore unknown max_sectors_kb %s", str);
+ }
}
}
diff --git a/src/sanlock.8 b/src/sanlock.8
index 7f5f4b7..0bc38ea 100644
--- a/src/sanlock.8
+++ b/src/sanlock.8
@@ -1333,6 +1333,15 @@ Add debug logging for each i/o. "submit" (no quotes) produces debug
output at submission time, "complete" produces debug output at completion
time, and "submit,complete" (no space) produces both.
+.IP \[bu] 2
+max_sectors_kb = <str>|<num>
+.br
+Set to "ignore" (no quotes) to prevent sanlock from checking or
+changing max_sectors_kb for the lockspace disk when starting a lockspace.
+Set to "align" (no quotes) to set max_sectors_kb for the lockspace disk
+to the align size of the lockspace.
+Set to a number to set a specific number of KB for all lockspace disks.
+
.SH SEE ALSO
.BR wdmd (8)
diff --git a/src/sanlock.conf b/src/sanlock.conf
index 9cd867f..7deecd2 100644
--- a/src/sanlock.conf
+++ b/src/sanlock.conf
@@ -52,3 +52,6 @@
#
# debug_io = <str>
# command line: n/a
+#
+# max_sectors_kb = <str>
+# command line: n/a
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index 01b0123..ebf9946 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -330,6 +330,10 @@ EXTERN struct client *client;
#define DEFAULT_QUIET_FAIL 1
#define DEFAULT_RENEWAL_HISTORY_SIZE 180 /* about 1 hour with 20 sec renewal interval */
+#define DEFAULT_MAX_SECTORS_KB_IGNORE 1 /* don't change it */
+#define DEFAULT_MAX_SECTORS_KB_ALIGN 0 /* set it to align size */
+#define DEFAULT_MAX_SECTORS_KB_NUM 0 /* set it to num KB for all lockspaces */
+
struct command_line {
int type; /* COM_ */
int action; /* ACT_ */
@@ -338,6 +342,9 @@ struct command_line {
int debug_io_submit;
int debug_io_complete;
int paxos_debug_all;
+ int max_sectors_kb_ignore;
+ int max_sectors_kb_align;
+ int max_sectors_kb_num;
int quiet_fail;
int wait;
int use_watchdog;
--
To stop receiving notification emails like this one, please contact
the administrator of this repository.
4 years, 10 months