mirror of https://github.com/hak5/openwrt.git
kernel: Copy patches from kernel 4.14 to 4.19
This just copies the files from the kernel 4.14 specific folders into the kernel 4.19 specific folder, no changes are done to the files in this commit. Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>openwrt-19.07
parent
a272af75cd
commit
52a82ce3dd
|
@ -0,0 +1,30 @@
|
||||||
|
From 13b1ecc3401653a355798eb1dee10cc1608202f4 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Felix Fietkau <nbd@nbd.name>
|
||||||
|
Date: Mon, 18 Jan 2016 12:27:49 +0100
|
||||||
|
Subject: [PATCH 33/34] Kbuild: don't hardcode path to awk in
|
||||||
|
scripts/ld-version.sh
|
||||||
|
|
||||||
|
On some systems /usr/bin/awk does not exist, or is broken. Find it via
|
||||||
|
$PATH instead.
|
||||||
|
|
||||||
|
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||||
|
---
|
||||||
|
scripts/ld-version.sh | 4 +++-
|
||||||
|
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
--- a/scripts/ld-version.sh
|
||||||
|
+++ b/scripts/ld-version.sh
|
||||||
|
@@ -1,6 +1,7 @@
|
||||||
|
-#!/usr/bin/awk -f
|
||||||
|
+#!/bin/sh
|
||||||
|
# SPDX-License-Identifier: GPL-2.0
|
||||||
|
# extract linker version number from stdin and turn into single number
|
||||||
|
+exec awk '
|
||||||
|
{
|
||||||
|
gsub(".*\\)", "");
|
||||||
|
gsub(".*version ", "");
|
||||||
|
@@ -9,3 +10,4 @@
|
||||||
|
print a[1]*100000000 + a[2]*1000000 + a[3]*10000;
|
||||||
|
exit
|
||||||
|
}
|
||||||
|
+'
|
|
@ -0,0 +1,23 @@
|
||||||
|
From 173019b66dcc9d68ad9333aa744dad1e369b5aa8 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Felix Fietkau <nbd@nbd.name>
|
||||||
|
Date: Sun, 9 Jul 2017 00:26:53 +0200
|
||||||
|
Subject: [PATCH 34/34] kernel: add compile fix for linux 4.9 on x86
|
||||||
|
|
||||||
|
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||||
|
---
|
||||||
|
Makefile | 4 ++--
|
||||||
|
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
--- a/Makefile
|
||||||
|
+++ b/Makefile
|
||||||
|
@@ -428,8 +428,8 @@ KBUILD_CFLAGS_MODULE := -DMODULE
|
||||||
|
KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds
|
||||||
|
GCC_PLUGINS_CFLAGS :=
|
||||||
|
|
||||||
|
-export ARCH SRCARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC
|
||||||
|
-export CPP AR NM STRIP OBJCOPY OBJDUMP HOSTLDFLAGS HOST_LOADLIBES
|
||||||
|
+export ARCH SRCARCH SUBARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD
|
||||||
|
+export CC CPP AR NM STRIP OBJCOPY OBJDUMP HOSTLDFLAGS HOST_LOADLIBES
|
||||||
|
export MAKE AWK GENKSYMS INSTALLKERNEL PERL PYTHON UTS_MACHINE
|
||||||
|
export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
|
||||||
|
|
|
@ -0,0 +1,140 @@
|
||||||
|
From: Arnd Bergmann <arnd@arndb.de>
|
||||||
|
Date: Tue, 19 Jun 2018 13:14:56 -0700
|
||||||
|
Subject: [PATCH] kbuild: add macro for controlling warnings to
|
||||||
|
linux/compiler.h
|
||||||
|
|
||||||
|
I have occasionally run into a situation where it would make sense to
|
||||||
|
control a compiler warning from a source file rather than doing so from
|
||||||
|
a Makefile using the $(cc-disable-warning, ...) or $(cc-option, ...)
|
||||||
|
helpers.
|
||||||
|
|
||||||
|
The approach here is similar to what glibc uses, using __diag() and
|
||||||
|
related macros to encapsulate a _Pragma("GCC diagnostic ...") statement
|
||||||
|
that gets turned into the respective "#pragma GCC diagnostic ..." by
|
||||||
|
the preprocessor when the macro gets expanded.
|
||||||
|
|
||||||
|
Like glibc, I also have an argument to pass the affected compiler
|
||||||
|
version, but decided to actually evaluate that one. For now, this
|
||||||
|
supports GCC_4_6, GCC_4_7, GCC_4_8, GCC_4_9, GCC_5, GCC_6, GCC_7,
|
||||||
|
GCC_8 and GCC_9. Adding support for CLANG_5 and other interesting
|
||||||
|
versions is straightforward here. GNU compilers starting with gcc-4.2
|
||||||
|
could support it in principle, but "#pragma GCC diagnostic push"
|
||||||
|
was only added in gcc-4.6, so it seems simpler to not deal with those
|
||||||
|
at all. The same versions show a large number of warnings already,
|
||||||
|
so it seems easier to just leave it at that and not do a more
|
||||||
|
fine-grained control for them.
|
||||||
|
|
||||||
|
The use cases I found so far include:
|
||||||
|
|
||||||
|
- turning off the gcc-8 -Wattribute-alias warning inside of the
|
||||||
|
SYSCALL_DEFINEx() macro without having to do it globally.
|
||||||
|
|
||||||
|
- Reducing the build time for a simple re-make after a change,
|
||||||
|
once we move the warnings from ./Makefile and
|
||||||
|
./scripts/Makefile.extrawarn into linux/compiler.h
|
||||||
|
|
||||||
|
- More control over the warnings based on other configurations,
|
||||||
|
using preprocessor syntax instead of Makefile syntax. This should make
|
||||||
|
it easier for the average developer to understand and change things.
|
||||||
|
|
||||||
|
- Adding an easy way to turn the W=1 option on unconditionally
|
||||||
|
for a subdirectory or a specific file. This has been requested
|
||||||
|
by several developers in the past that want to have their subsystems
|
||||||
|
W=1 clean.
|
||||||
|
|
||||||
|
- Integrating clang better into the build systems. Clang supports
|
||||||
|
more warnings than GCC, and we probably want to classify them
|
||||||
|
as default, W=1, W=2 etc, but there are cases in which the
|
||||||
|
warnings should be classified differently due to excessive false
|
||||||
|
positives from one or the other compiler.
|
||||||
|
|
||||||
|
- Adding a way to turn the default warnings into errors (e.g. using
|
||||||
|
a new "make E=0" tag) while not also turning the W=1 warnings into
|
||||||
|
errors.
|
||||||
|
|
||||||
|
This patch for now just adds the minimal infrastructure in order to
|
||||||
|
do the first of the list above. As the #pragma GCC diagnostic
|
||||||
|
takes precedence over command line options, the next step would be
|
||||||
|
to convert a lot of the individual Makefiles that set nonstandard
|
||||||
|
options to use __diag() instead.
|
||||||
|
|
||||||
|
[paul.burton@mips.com:
|
||||||
|
- Rebase atop current master.
|
||||||
|
- Add __diag_GCC, or more generally __diag_<compiler>, abstraction to
|
||||||
|
avoid code outside of linux/compiler-gcc.h needing to duplicate
|
||||||
|
knowledge about different GCC versions.
|
||||||
|
- Add a comment argument to __diag_{ignore,warn,error} which isn't
|
||||||
|
used in the expansion of the macros but serves to push people to
|
||||||
|
document the reason for using them - per feedback from Kees Cook.
|
||||||
|
- Translate severity to GCC-specific pragmas in linux/compiler-gcc.h
|
||||||
|
rather than using GCC-specific in linux/compiler_types.h.
|
||||||
|
- Drop all but GCC 8 macros, since we only need to define macros for
|
||||||
|
versions that we need to introduce pragmas for, and as of this
|
||||||
|
series that's just GCC 8.
|
||||||
|
- Capitalize comments in linux/compiler-gcc.h to match the style of
|
||||||
|
the rest of the file.
|
||||||
|
- Line up macro definitions with tabs in linux/compiler-gcc.h.]
|
||||||
|
|
||||||
|
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
|
||||||
|
Signed-off-by: Paul Burton <paul.burton@mips.com>
|
||||||
|
Tested-by: Christophe Leroy <christophe.leroy@c-s.fr>
|
||||||
|
Tested-by: Stafford Horne <shorne@gmail.com>
|
||||||
|
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/linux/compiler-gcc.h
|
||||||
|
+++ b/include/linux/compiler-gcc.h
|
||||||
|
@@ -358,3 +358,28 @@
|
||||||
|
* code
|
||||||
|
*/
|
||||||
|
#define uninitialized_var(x) x = x
|
||||||
|
+
|
||||||
|
+/*
|
||||||
|
+ * Turn individual warnings and errors on and off locally, depending
|
||||||
|
+ * on version.
|
||||||
|
+ */
|
||||||
|
+#define __diag_GCC(version, severity, s) \
|
||||||
|
+ __diag_GCC_ ## version(__diag_GCC_ ## severity s)
|
||||||
|
+
|
||||||
|
+/* Severity used in pragma directives */
|
||||||
|
+#define __diag_GCC_ignore ignored
|
||||||
|
+#define __diag_GCC_warn warning
|
||||||
|
+#define __diag_GCC_error error
|
||||||
|
+
|
||||||
|
+/* Compilers before gcc-4.6 do not understand "#pragma GCC diagnostic push" */
|
||||||
|
+#if GCC_VERSION >= 40600
|
||||||
|
+#define __diag_str1(s) #s
|
||||||
|
+#define __diag_str(s) __diag_str1(s)
|
||||||
|
+#define __diag(s) _Pragma(__diag_str(GCC diagnostic s))
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#if GCC_VERSION >= 80000
|
||||||
|
+#define __diag_GCC_8(s) __diag(s)
|
||||||
|
+#else
|
||||||
|
+#define __diag_GCC_8(s)
|
||||||
|
+#endif
|
||||||
|
--- a/include/linux/compiler_types.h
|
||||||
|
+++ b/include/linux/compiler_types.h
|
||||||
|
@@ -283,4 +283,22 @@ struct ftrace_likely_data {
|
||||||
|
# define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
+#ifndef __diag
|
||||||
|
+#define __diag(string)
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#ifndef __diag_GCC
|
||||||
|
+#define __diag_GCC(version, severity, string)
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#define __diag_push() __diag(push)
|
||||||
|
+#define __diag_pop() __diag(pop)
|
||||||
|
+
|
||||||
|
+#define __diag_ignore(compiler, version, option, comment) \
|
||||||
|
+ __diag_ ## compiler(version, ignore, option)
|
||||||
|
+#define __diag_warn(compiler, version, option, comment) \
|
||||||
|
+ __diag_ ## compiler(version, warn, option)
|
||||||
|
+#define __diag_error(compiler, version, option, comment) \
|
||||||
|
+ __diag_ ## compiler(version, error, option)
|
||||||
|
+
|
||||||
|
#endif /* __LINUX_COMPILER_TYPES_H */
|
|
@ -0,0 +1,88 @@
|
||||||
|
From: Arnd Bergmann <arnd@arndb.de>
|
||||||
|
Date: Tue, 19 Jun 2018 13:14:57 -0700
|
||||||
|
Subject: [PATCH] disable -Wattribute-alias warning for SYSCALL_DEFINEx()
|
||||||
|
|
||||||
|
gcc-8 warns for every single definition of a system call entry
|
||||||
|
point, e.g.:
|
||||||
|
|
||||||
|
include/linux/compat.h:56:18: error: 'compat_sys_rt_sigprocmask' alias between functions of incompatible types 'long int(int, compat_sigset_t *, compat_sigset_t *, compat_size_t)' {aka 'long int(int, struct <anonymous> *, struct <anonymous> *, unsigned int)'} and 'long int(long int, long int, long int, long int)' [-Werror=attribute-alias]
|
||||||
|
asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))\
|
||||||
|
^~~~~~~~~~
|
||||||
|
include/linux/compat.h:45:2: note: in expansion of macro 'COMPAT_SYSCALL_DEFINEx'
|
||||||
|
COMPAT_SYSCALL_DEFINEx(4, _##name, __VA_ARGS__)
|
||||||
|
^~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
kernel/signal.c:2601:1: note: in expansion of macro 'COMPAT_SYSCALL_DEFINE4'
|
||||||
|
COMPAT_SYSCALL_DEFINE4(rt_sigprocmask, int, how, compat_sigset_t __user *, nset,
|
||||||
|
^~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
include/linux/compat.h:60:18: note: aliased declaration here
|
||||||
|
asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))\
|
||||||
|
^~~~~~~~~~
|
||||||
|
|
||||||
|
The new warning seems reasonable in principle, but it doesn't
|
||||||
|
help us here, since we rely on the type mismatch to sanitize the
|
||||||
|
system call arguments. After I reported this as GCC PR82435, a new
|
||||||
|
-Wno-attribute-alias option was added that could be used to turn the
|
||||||
|
warning off globally on the command line, but I'd prefer to do it a
|
||||||
|
little more fine-grained.
|
||||||
|
|
||||||
|
Interestingly, turning a warning off and on again inside of
|
||||||
|
a single macro doesn't always work, in this case I had to add
|
||||||
|
an extra statement inbetween and decided to copy the __SC_TEST
|
||||||
|
one from the native syscall to the compat syscall macro. See
|
||||||
|
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83256 for more details
|
||||||
|
about this.
|
||||||
|
|
||||||
|
[paul.burton@mips.com:
|
||||||
|
- Rebase atop current master.
|
||||||
|
- Split GCC & version arguments to __diag_ignore() in order to match
|
||||||
|
changes to the preceding patch.
|
||||||
|
- Add the comment argument to match the preceding patch.]
|
||||||
|
|
||||||
|
Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82435
|
||||||
|
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
|
||||||
|
Signed-off-by: Paul Burton <paul.burton@mips.com>
|
||||||
|
Tested-by: Christophe Leroy <christophe.leroy@c-s.fr>
|
||||||
|
Tested-by: Stafford Horne <shorne@gmail.com>
|
||||||
|
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/linux/compat.h
|
||||||
|
+++ b/include/linux/compat.h
|
||||||
|
@@ -48,6 +48,9 @@
|
||||||
|
COMPAT_SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)
|
||||||
|
|
||||||
|
#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \
|
||||||
|
+ __diag_push(); \
|
||||||
|
+ __diag_ignore(GCC, 8, "-Wattribute-alias", \
|
||||||
|
+ "Type aliasing is used to sanitize syscall arguments");\
|
||||||
|
asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))\
|
||||||
|
__attribute__((alias(__stringify(compat_SyS##name)))); \
|
||||||
|
static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
|
||||||
|
@@ -56,6 +59,7 @@
|
||||||
|
{ \
|
||||||
|
return C_SYSC##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__)); \
|
||||||
|
} \
|
||||||
|
+ __diag_pop(); \
|
||||||
|
static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__))
|
||||||
|
|
||||||
|
#ifndef compat_user_stack_pointer
|
||||||
|
--- a/include/linux/syscalls.h
|
||||||
|
+++ b/include/linux/syscalls.h
|
||||||
|
@@ -208,6 +208,9 @@ static inline int is_syscall_trace_event
|
||||||
|
|
||||||
|
#define __PROTECT(...) asmlinkage_protect(__VA_ARGS__)
|
||||||
|
#define __SYSCALL_DEFINEx(x, name, ...) \
|
||||||
|
+ __diag_push(); \
|
||||||
|
+ __diag_ignore(GCC, 8, "-Wattribute-alias", \
|
||||||
|
+ "Type aliasing is used to sanitize syscall arguments");\
|
||||||
|
asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \
|
||||||
|
__attribute__((alias(__stringify(SyS##name)))); \
|
||||||
|
static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
|
||||||
|
@@ -219,6 +222,7 @@ static inline int is_syscall_trace_event
|
||||||
|
__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \
|
||||||
|
return ret; \
|
||||||
|
} \
|
||||||
|
+ __diag_pop(); \
|
||||||
|
static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__))
|
||||||
|
|
||||||
|
/*
|
|
@ -0,0 +1,558 @@
|
||||||
|
From 1bb0c3ec899827cfa4668bb63a08713a40744d21 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Florian Westphal <fw@strlen.de>
|
||||||
|
Date: Sun, 9 Jul 2017 08:58:30 +0200
|
||||||
|
Subject: [PATCH] netfilter: conntrack: cache route for forwarded connections
|
||||||
|
|
||||||
|
... to avoid per-packet FIB lookup if possible.
|
||||||
|
|
||||||
|
The cached dst is re-used provided the input interface
|
||||||
|
is the same as that of the previous packet in the same direction.
|
||||||
|
|
||||||
|
If not, the cached dst is invalidated.
|
||||||
|
|
||||||
|
For ipv6 we also need to store sernum, else dst_check doesn't work,
|
||||||
|
pointed out by Eric Dumazet.
|
||||||
|
|
||||||
|
This should speed up forwarding when conntrack is already in use
|
||||||
|
anyway, especially when using reverse path filtering -- active RPF
|
||||||
|
enforces two FIB lookups for each packet.
|
||||||
|
|
||||||
|
Before the routing cache removal this didn't matter since RPF was performed
|
||||||
|
only when route cache didn't yield a result; but without route cache it
|
||||||
|
comes at higher price.
|
||||||
|
|
||||||
|
Julian Anastasov suggested to add NETDEV_UNREGISTER handler to
|
||||||
|
avoid holding on to dsts of 'frozen' conntracks.
|
||||||
|
|
||||||
|
Signed-off-by: Florian Westphal <fw@strlen.de>
|
||||||
|
---
|
||||||
|
include/net/netfilter/nf_conntrack_extend.h | 4 +
|
||||||
|
include/net/netfilter/nf_conntrack_rtcache.h | 34 +++
|
||||||
|
net/netfilter/Kconfig | 12 +
|
||||||
|
net/netfilter/Makefile | 3 +
|
||||||
|
net/netfilter/nf_conntrack_rtcache.c | 428 +++++++++++++++++++++++++++
|
||||||
|
5 files changed, 481 insertions(+)
|
||||||
|
create mode 100644 include/net/netfilter/nf_conntrack_rtcache.h
|
||||||
|
create mode 100644 net/netfilter/nf_conntrack_rtcache.c
|
||||||
|
|
||||||
|
--- a/include/net/netfilter/nf_conntrack_extend.h
|
||||||
|
+++ b/include/net/netfilter/nf_conntrack_extend.h
|
||||||
|
@@ -28,6 +28,9 @@ enum nf_ct_ext_id {
|
||||||
|
#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
|
||||||
|
NF_CT_EXT_SYNPROXY,
|
||||||
|
#endif
|
||||||
|
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_RTCACHE)
|
||||||
|
+ NF_CT_EXT_RTCACHE,
|
||||||
|
+#endif
|
||||||
|
NF_CT_EXT_NUM,
|
||||||
|
};
|
||||||
|
|
||||||
|
@@ -40,6 +43,7 @@ enum nf_ct_ext_id {
|
||||||
|
#define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout
|
||||||
|
#define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels
|
||||||
|
#define NF_CT_EXT_SYNPROXY_TYPE struct nf_conn_synproxy
|
||||||
|
+#define NF_CT_EXT_RTCACHE_TYPE struct nf_conn_rtcache
|
||||||
|
|
||||||
|
/* Extensions: optional stuff which isn't permanently in struct. */
|
||||||
|
struct nf_ct_ext {
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/include/net/netfilter/nf_conntrack_rtcache.h
|
||||||
|
@@ -0,0 +1,34 @@
|
||||||
|
+#include <linux/gfp.h>
|
||||||
|
+#include <net/netfilter/nf_conntrack.h>
|
||||||
|
+#include <net/netfilter/nf_conntrack_extend.h>
|
||||||
|
+
|
||||||
|
+struct dst_entry;
|
||||||
|
+
|
||||||
|
+struct nf_conn_dst_cache {
|
||||||
|
+ struct dst_entry *dst;
|
||||||
|
+ int iif;
|
||||||
|
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
|
||||||
|
+ u32 cookie;
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+struct nf_conn_rtcache {
|
||||||
|
+ struct nf_conn_dst_cache cached_dst[IP_CT_DIR_MAX];
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static inline
|
||||||
|
+struct nf_conn_rtcache *nf_ct_rtcache_find(const struct nf_conn *ct)
|
||||||
|
+{
|
||||||
|
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_RTCACHE)
|
||||||
|
+ return nf_ct_ext_find(ct, NF_CT_EXT_RTCACHE);
|
||||||
|
+#else
|
||||||
|
+ return NULL;
|
||||||
|
+#endif
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static inline int nf_conn_rtcache_iif_get(const struct nf_conn_rtcache *rtc,
|
||||||
|
+ enum ip_conntrack_dir dir)
|
||||||
|
+{
|
||||||
|
+ return rtc->cached_dst[dir].iif;
|
||||||
|
+}
|
||||||
|
--- a/net/netfilter/Kconfig
|
||||||
|
+++ b/net/netfilter/Kconfig
|
||||||
|
@@ -118,6 +118,18 @@ config NF_CONNTRACK_EVENTS
|
||||||
|
|
||||||
|
If unsure, say `N'.
|
||||||
|
|
||||||
|
+config NF_CONNTRACK_RTCACHE
|
||||||
|
+ tristate "Cache route entries in conntrack objects"
|
||||||
|
+ depends on NETFILTER_ADVANCED
|
||||||
|
+ depends on NF_CONNTRACK
|
||||||
|
+ help
|
||||||
|
+ If this option is enabled, the connection tracking code will
|
||||||
|
+ cache routing information for each connection that is being
|
||||||
|
+ forwarded, at a cost of 32 bytes per conntrack object.
|
||||||
|
+
|
||||||
|
+ To compile it as a module, choose M here. If unsure, say N.
|
||||||
|
+ The module will be called nf_conntrack_rtcache.
|
||||||
|
+
|
||||||
|
config NF_CONNTRACK_TIMEOUT
|
||||||
|
bool 'Connection tracking timeout'
|
||||||
|
depends on NETFILTER_ADVANCED
|
||||||
|
--- a/net/netfilter/Makefile
|
||||||
|
+++ b/net/netfilter/Makefile
|
||||||
|
@@ -19,6 +19,9 @@ obj-$(CONFIG_NETFILTER_NETLINK_LOG) += n
|
||||||
|
# connection tracking
|
||||||
|
obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o
|
||||||
|
|
||||||
|
+# optional conntrack route cache extension
|
||||||
|
+obj-$(CONFIG_NF_CONNTRACK_RTCACHE) += nf_conntrack_rtcache.o
|
||||||
|
+
|
||||||
|
obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
|
||||||
|
|
||||||
|
# netlink interface for nf_conntrack
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/net/netfilter/nf_conntrack_rtcache.c
|
||||||
|
@@ -0,0 +1,428 @@
|
||||||
|
+/* route cache for netfilter.
|
||||||
|
+ *
|
||||||
|
+ * (C) 2014 Red Hat GmbH
|
||||||
|
+ *
|
||||||
|
+ * This program is free software; you can redistribute it and/or modify
|
||||||
|
+ * it under the terms of the GNU General Public License version 2 as
|
||||||
|
+ * published by the Free Software Foundation.
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||||
|
+
|
||||||
|
+#include <linux/types.h>
|
||||||
|
+#include <linux/netfilter.h>
|
||||||
|
+#include <linux/skbuff.h>
|
||||||
|
+#include <linux/stddef.h>
|
||||||
|
+#include <linux/kernel.h>
|
||||||
|
+#include <linux/netdevice.h>
|
||||||
|
+#include <linux/export.h>
|
||||||
|
+#include <linux/module.h>
|
||||||
|
+
|
||||||
|
+#include <net/dst.h>
|
||||||
|
+
|
||||||
|
+#include <net/netfilter/nf_conntrack.h>
|
||||||
|
+#include <net/netfilter/nf_conntrack_core.h>
|
||||||
|
+#include <net/netfilter/nf_conntrack_extend.h>
|
||||||
|
+#include <net/netfilter/nf_conntrack_rtcache.h>
|
||||||
|
+
|
||||||
|
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
|
||||||
|
+#include <net/ip6_fib.h>
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+static void __nf_conn_rtcache_destroy(struct nf_conn_rtcache *rtc,
|
||||||
|
+ enum ip_conntrack_dir dir)
|
||||||
|
+{
|
||||||
|
+ struct dst_entry *dst = rtc->cached_dst[dir].dst;
|
||||||
|
+
|
||||||
|
+ dst_release(dst);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void nf_conn_rtcache_destroy(struct nf_conn *ct)
|
||||||
|
+{
|
||||||
|
+ struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct);
|
||||||
|
+
|
||||||
|
+ if (!rtc)
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ __nf_conn_rtcache_destroy(rtc, IP_CT_DIR_ORIGINAL);
|
||||||
|
+ __nf_conn_rtcache_destroy(rtc, IP_CT_DIR_REPLY);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void nf_ct_rtcache_ext_add(struct nf_conn *ct)
|
||||||
|
+{
|
||||||
|
+ struct nf_conn_rtcache *rtc;
|
||||||
|
+
|
||||||
|
+ rtc = nf_ct_ext_add(ct, NF_CT_EXT_RTCACHE, GFP_ATOMIC);
|
||||||
|
+ if (rtc) {
|
||||||
|
+ rtc->cached_dst[IP_CT_DIR_ORIGINAL].iif = -1;
|
||||||
|
+ rtc->cached_dst[IP_CT_DIR_ORIGINAL].dst = NULL;
|
||||||
|
+ rtc->cached_dst[IP_CT_DIR_REPLY].iif = -1;
|
||||||
|
+ rtc->cached_dst[IP_CT_DIR_REPLY].dst = NULL;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static struct nf_conn_rtcache *nf_ct_rtcache_find_usable(struct nf_conn *ct)
|
||||||
|
+{
|
||||||
|
+ return nf_ct_rtcache_find(ct);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static struct dst_entry *
|
||||||
|
+nf_conn_rtcache_dst_get(const struct nf_conn_rtcache *rtc,
|
||||||
|
+ enum ip_conntrack_dir dir)
|
||||||
|
+{
|
||||||
|
+ return rtc->cached_dst[dir].dst;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static u32 nf_rtcache_get_cookie(int pf, const struct dst_entry *dst)
|
||||||
|
+{
|
||||||
|
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
|
||||||
|
+ if (pf == NFPROTO_IPV6) {
|
||||||
|
+ const struct rt6_info *rt = (const struct rt6_info *)dst;
|
||||||
|
+
|
||||||
|
+ if (rt->rt6i_node)
|
||||||
|
+ return (u32)rt->rt6i_node->fn_sernum;
|
||||||
|
+ }
|
||||||
|
+#endif
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void nf_conn_rtcache_dst_set(int pf,
|
||||||
|
+ struct nf_conn_rtcache *rtc,
|
||||||
|
+ struct dst_entry *dst,
|
||||||
|
+ enum ip_conntrack_dir dir, int iif)
|
||||||
|
+{
|
||||||
|
+ if (rtc->cached_dst[dir].iif != iif)
|
||||||
|
+ rtc->cached_dst[dir].iif = iif;
|
||||||
|
+
|
||||||
|
+ if (rtc->cached_dst[dir].dst != dst) {
|
||||||
|
+ struct dst_entry *old;
|
||||||
|
+
|
||||||
|
+ dst_hold(dst);
|
||||||
|
+
|
||||||
|
+ old = xchg(&rtc->cached_dst[dir].dst, dst);
|
||||||
|
+ dst_release(old);
|
||||||
|
+
|
||||||
|
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
|
||||||
|
+ if (pf == NFPROTO_IPV6)
|
||||||
|
+ rtc->cached_dst[dir].cookie =
|
||||||
|
+ nf_rtcache_get_cookie(pf, dst);
|
||||||
|
+#endif
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void nf_conn_rtcache_dst_obsolete(struct nf_conn_rtcache *rtc,
|
||||||
|
+ enum ip_conntrack_dir dir)
|
||||||
|
+{
|
||||||
|
+ struct dst_entry *old;
|
||||||
|
+
|
||||||
|
+ pr_debug("Invalidate iif %d for dir %d on cache %p\n",
|
||||||
|
+ rtc->cached_dst[dir].iif, dir, rtc);
|
||||||
|
+
|
||||||
|
+ old = xchg(&rtc->cached_dst[dir].dst, NULL);
|
||||||
|
+ dst_release(old);
|
||||||
|
+ rtc->cached_dst[dir].iif = -1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static unsigned int nf_rtcache_in(u_int8_t pf,
|
||||||
|
+ struct sk_buff *skb,
|
||||||
|
+ const struct nf_hook_state *state)
|
||||||
|
+{
|
||||||
|
+ struct nf_conn_rtcache *rtc;
|
||||||
|
+ enum ip_conntrack_info ctinfo;
|
||||||
|
+ enum ip_conntrack_dir dir;
|
||||||
|
+ struct dst_entry *dst;
|
||||||
|
+ struct nf_conn *ct;
|
||||||
|
+ int iif;
|
||||||
|
+ u32 cookie;
|
||||||
|
+
|
||||||
|
+ if (skb_dst(skb) || skb->sk)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ ct = nf_ct_get(skb, &ctinfo);
|
||||||
|
+ if (!ct)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ rtc = nf_ct_rtcache_find_usable(ct);
|
||||||
|
+ if (!rtc)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ /* if iif changes, don't use cache and let ip stack
|
||||||
|
+ * do route lookup.
|
||||||
|
+ *
|
||||||
|
+ * If rp_filter is enabled it might toss skb, so
|
||||||
|
+ * we don't want to avoid these checks.
|
||||||
|
+ */
|
||||||
|
+ dir = CTINFO2DIR(ctinfo);
|
||||||
|
+ iif = nf_conn_rtcache_iif_get(rtc, dir);
|
||||||
|
+ if (state->in->ifindex != iif) {
|
||||||
|
+ pr_debug("ct %p, iif %d, cached iif %d, skip cached entry\n",
|
||||||
|
+ ct, iif, state->in->ifindex);
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+ }
|
||||||
|
+ dst = nf_conn_rtcache_dst_get(rtc, dir);
|
||||||
|
+ if (dst == NULL)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ cookie = nf_rtcache_get_cookie(pf, dst);
|
||||||
|
+
|
||||||
|
+ dst = dst_check(dst, cookie);
|
||||||
|
+ pr_debug("obtained dst %p for skb %p, cookie %d\n", dst, skb, cookie);
|
||||||
|
+ if (likely(dst))
|
||||||
|
+ skb_dst_set_noref(skb, dst);
|
||||||
|
+ else
|
||||||
|
+ nf_conn_rtcache_dst_obsolete(rtc, dir);
|
||||||
|
+
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static unsigned int nf_rtcache_forward(u_int8_t pf,
|
||||||
|
+ struct sk_buff *skb,
|
||||||
|
+ const struct nf_hook_state *state)
|
||||||
|
+{
|
||||||
|
+ struct nf_conn_rtcache *rtc;
|
||||||
|
+ enum ip_conntrack_info ctinfo;
|
||||||
|
+ enum ip_conntrack_dir dir;
|
||||||
|
+ struct nf_conn *ct;
|
||||||
|
+ struct dst_entry *dst = skb_dst(skb);
|
||||||
|
+ int iif;
|
||||||
|
+
|
||||||
|
+ ct = nf_ct_get(skb, &ctinfo);
|
||||||
|
+ if (!ct)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ if (dst && dst_xfrm(dst))
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ if (!nf_ct_is_confirmed(ct)) {
|
||||||
|
+ if (WARN_ON(nf_ct_rtcache_find(ct)))
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+ nf_ct_rtcache_ext_add(ct);
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ rtc = nf_ct_rtcache_find_usable(ct);
|
||||||
|
+ if (!rtc)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ dir = CTINFO2DIR(ctinfo);
|
||||||
|
+ iif = nf_conn_rtcache_iif_get(rtc, dir);
|
||||||
|
+ pr_debug("ct %p, skb %p, dir %d, iif %d, cached iif %d\n",
|
||||||
|
+ ct, skb, dir, iif, state->in->ifindex);
|
||||||
|
+ if (likely(state->in->ifindex == iif))
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ nf_conn_rtcache_dst_set(pf, rtc, skb_dst(skb), dir, state->in->ifindex);
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static unsigned int nf_rtcache_in4(void *priv,
|
||||||
|
+ struct sk_buff *skb,
|
||||||
|
+ const struct nf_hook_state *state)
|
||||||
|
+{
|
||||||
|
+ return nf_rtcache_in(NFPROTO_IPV4, skb, state);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static unsigned int nf_rtcache_forward4(void *priv,
|
||||||
|
+ struct sk_buff *skb,
|
||||||
|
+ const struct nf_hook_state *state)
|
||||||
|
+{
|
||||||
|
+ return nf_rtcache_forward(NFPROTO_IPV4, skb, state);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
|
||||||
|
+static unsigned int nf_rtcache_in6(void *priv,
|
||||||
|
+ struct sk_buff *skb,
|
||||||
|
+ const struct nf_hook_state *state)
|
||||||
|
+{
|
||||||
|
+ return nf_rtcache_in(NFPROTO_IPV6, skb, state);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static unsigned int nf_rtcache_forward6(void *priv,
|
||||||
|
+ struct sk_buff *skb,
|
||||||
|
+ const struct nf_hook_state *state)
|
||||||
|
+{
|
||||||
|
+ return nf_rtcache_forward(NFPROTO_IPV6, skb, state);
|
||||||
|
+}
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+static int nf_rtcache_dst_remove(struct nf_conn *ct, void *data)
|
||||||
|
+{
|
||||||
|
+ struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct);
|
||||||
|
+ struct net_device *dev = data;
|
||||||
|
+
|
||||||
|
+ if (!rtc)
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+ if (dev->ifindex == rtc->cached_dst[IP_CT_DIR_ORIGINAL].iif ||
|
||||||
|
+ dev->ifindex == rtc->cached_dst[IP_CT_DIR_REPLY].iif) {
|
||||||
|
+ nf_conn_rtcache_dst_obsolete(rtc, IP_CT_DIR_ORIGINAL);
|
||||||
|
+ nf_conn_rtcache_dst_obsolete(rtc, IP_CT_DIR_REPLY);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_rtcache_netdev_event(struct notifier_block *this,
|
||||||
|
+ unsigned long event, void *ptr)
|
||||||
|
+{
|
||||||
|
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
|
||||||
|
+ struct net *net = dev_net(dev);
|
||||||
|
+
|
||||||
|
+ if (event == NETDEV_DOWN)
|
||||||
|
+ nf_ct_iterate_cleanup_net(net, nf_rtcache_dst_remove, dev, 0, 0);
|
||||||
|
+
|
||||||
|
+ return NOTIFY_DONE;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static struct notifier_block nf_rtcache_notifier = {
|
||||||
|
+ .notifier_call = nf_rtcache_netdev_event,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static struct nf_hook_ops rtcache_ops[] = {
|
||||||
|
+ {
|
||||||
|
+ .hook = nf_rtcache_in4,
|
||||||
|
+ .pf = NFPROTO_IPV4,
|
||||||
|
+ .hooknum = NF_INET_PRE_ROUTING,
|
||||||
|
+ .priority = NF_IP_PRI_LAST,
|
||||||
|
+ },
|
||||||
|
+ {
|
||||||
|
+ .hook = nf_rtcache_forward4,
|
||||||
|
+ .pf = NFPROTO_IPV4,
|
||||||
|
+ .hooknum = NF_INET_FORWARD,
|
||||||
|
+ .priority = NF_IP_PRI_LAST,
|
||||||
|
+ },
|
||||||
|
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
|
||||||
|
+ {
|
||||||
|
+ .hook = nf_rtcache_in6,
|
||||||
|
+ .pf = NFPROTO_IPV6,
|
||||||
|
+ .hooknum = NF_INET_PRE_ROUTING,
|
||||||
|
+ .priority = NF_IP_PRI_LAST,
|
||||||
|
+ },
|
||||||
|
+ {
|
||||||
|
+ .hook = nf_rtcache_forward6,
|
||||||
|
+ .pf = NFPROTO_IPV6,
|
||||||
|
+ .hooknum = NF_INET_FORWARD,
|
||||||
|
+ .priority = NF_IP_PRI_LAST,
|
||||||
|
+ },
|
||||||
|
+#endif
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static struct nf_ct_ext_type rtcache_extend __read_mostly = {
|
||||||
|
+ .len = sizeof(struct nf_conn_rtcache),
|
||||||
|
+ .align = __alignof__(struct nf_conn_rtcache),
|
||||||
|
+ .id = NF_CT_EXT_RTCACHE,
|
||||||
|
+ .destroy = nf_conn_rtcache_destroy,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static void __net_exit rtcache_net_exit(struct net *net)
|
||||||
|
+{
|
||||||
|
+ /* remove hooks so no new connections get rtcache extension */
|
||||||
|
+ nf_unregister_net_hooks(net, rtcache_ops, ARRAY_SIZE(rtcache_ops));
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static struct pernet_operations rtcache_ops_net_ops = {
|
||||||
|
+ .exit = rtcache_net_exit,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static int __init nf_conntrack_rtcache_init(void)
|
||||||
|
+{
|
||||||
|
+ int ret = nf_ct_extend_register(&rtcache_extend);
|
||||||
|
+
|
||||||
|
+ if (ret < 0) {
|
||||||
|
+ pr_err("nf_conntrack_rtcache: Unable to register extension\n");
|
||||||
|
+ return ret;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ ret = register_pernet_subsys(&rtcache_ops_net_ops);
|
||||||
|
+ if (ret) {
|
||||||
|
+ nf_ct_extend_unregister(&rtcache_extend);
|
||||||
|
+ return ret;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ ret = nf_register_net_hooks(&init_net, rtcache_ops,
|
||||||
|
+ ARRAY_SIZE(rtcache_ops));
|
||||||
|
+ if (ret < 0) {
|
||||||
|
+ nf_ct_extend_unregister(&rtcache_extend);
|
||||||
|
+ unregister_pernet_subsys(&rtcache_ops_net_ops);
|
||||||
|
+ return ret;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ ret = register_netdevice_notifier(&nf_rtcache_notifier);
|
||||||
|
+ if (ret) {
|
||||||
|
+ nf_unregister_net_hooks(&init_net, rtcache_ops,
|
||||||
|
+ ARRAY_SIZE(rtcache_ops));
|
||||||
|
+ nf_ct_extend_unregister(&rtcache_extend);
|
||||||
|
+ unregister_pernet_subsys(&rtcache_ops_net_ops);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_rtcache_ext_remove(struct nf_conn *ct, void *data)
|
||||||
|
+{
|
||||||
|
+ struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct);
|
||||||
|
+
|
||||||
|
+ return rtc != NULL;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static bool __exit nf_conntrack_rtcache_wait_for_dying(struct net *net)
|
||||||
|
+{
|
||||||
|
+ bool wait = false;
|
||||||
|
+ int cpu;
|
||||||
|
+
|
||||||
|
+ for_each_possible_cpu(cpu) {
|
||||||
|
+ struct nf_conntrack_tuple_hash *h;
|
||||||
|
+ struct hlist_nulls_node *n;
|
||||||
|
+ struct nf_conn *ct;
|
||||||
|
+ struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
|
||||||
|
+
|
||||||
|
+ rcu_read_lock();
|
||||||
|
+ spin_lock_bh(&pcpu->lock);
|
||||||
|
+
|
||||||
|
+ hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
|
||||||
|
+ ct = nf_ct_tuplehash_to_ctrack(h);
|
||||||
|
+ if (nf_ct_rtcache_find(ct) != NULL) {
|
||||||
|
+ wait = true;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ spin_unlock_bh(&pcpu->lock);
|
||||||
|
+ rcu_read_unlock();
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return wait;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void __exit nf_conntrack_rtcache_fini(void)
|
||||||
|
+{
|
||||||
|
+ struct net *net;
|
||||||
|
+ int count = 0;
|
||||||
|
+
|
||||||
|
+ synchronize_net();
|
||||||
|
+
|
||||||
|
+ unregister_netdevice_notifier(&nf_rtcache_notifier);
|
||||||
|
+
|
||||||
|
+ rtnl_lock();
|
||||||
|
+
|
||||||
|
+ /* zap all conntracks with rtcache extension */
|
||||||
|
+ for_each_net(net)
|
||||||
|
+ nf_ct_iterate_cleanup_net(net, nf_rtcache_ext_remove, NULL, 0, 0);
|
||||||
|
+
|
||||||
|
+ for_each_net(net) {
|
||||||
|
+ /* .. and make sure they're gone from dying list, too */
|
||||||
|
+ while (nf_conntrack_rtcache_wait_for_dying(net)) {
|
||||||
|
+ msleep(200);
|
||||||
|
+ WARN_ONCE(++count > 25, "Waiting for all rtcache conntracks to go away\n");
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ rtnl_unlock();
|
||||||
|
+ synchronize_net();
|
||||||
|
+ nf_ct_extend_unregister(&rtcache_extend);
|
||||||
|
+}
|
||||||
|
+module_init(nf_conntrack_rtcache_init);
|
||||||
|
+module_exit(nf_conntrack_rtcache_fini);
|
||||||
|
+
|
||||||
|
+MODULE_LICENSE("GPL");
|
||||||
|
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
|
||||||
|
+MODULE_DESCRIPTION("Conntrack route cache extension");
|
|
@ -0,0 +1,85 @@
|
||||||
|
From: Eric Dumazet <edumazet@google.com>
|
||||||
|
Date: Sat, 11 Nov 2017 15:54:12 -0800
|
||||||
|
Subject: [PATCH] tcp: allow drivers to tweak TSQ logic
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
I had many reports that TSQ logic breaks wifi aggregation.
|
||||||
|
|
||||||
|
Current logic is to allow up to 1 ms of bytes to be queued into qdisc
|
||||||
|
and drivers queues.
|
||||||
|
|
||||||
|
But Wifi aggregation needs a bigger budget to allow bigger rates to
|
||||||
|
be discovered by various TCP Congestion Controls algorithms.
|
||||||
|
|
||||||
|
This patch adds an extra socket field, allowing wifi drivers to select
|
||||||
|
another log scale to derive TCP Small Queue credit from current pacing
|
||||||
|
rate.
|
||||||
|
|
||||||
|
Initial value is 10, meaning that this patch does not change current
|
||||||
|
behavior.
|
||||||
|
|
||||||
|
We expect wifi drivers to set this field to smaller values (tests have
|
||||||
|
been done with values from 6 to 9)
|
||||||
|
|
||||||
|
They would have to use following template :
|
||||||
|
|
||||||
|
if (skb->sk && skb->sk->sk_pacing_shift != MY_PACING_SHIFT)
|
||||||
|
skb->sk->sk_pacing_shift = MY_PACING_SHIFT;
|
||||||
|
|
||||||
|
Ref: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1670041
|
||||||
|
Signed-off-by: Eric Dumazet <edumazet@google.com>
|
||||||
|
Cc: Johannes Berg <johannes.berg@intel.com>
|
||||||
|
Cc: Toke Høiland-Jørgensen <toke@toke.dk>
|
||||||
|
Cc: Kir Kolyshkin <kir@openvz.org>
|
||||||
|
---
|
||||||
|
--- a/include/net/sock.h
|
||||||
|
+++ b/include/net/sock.h
|
||||||
|
@@ -267,6 +267,7 @@ struct sock_common {
|
||||||
|
* @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
|
||||||
|
* @sk_gso_max_size: Maximum GSO segment size to build
|
||||||
|
* @sk_gso_max_segs: Maximum number of GSO segments
|
||||||
|
+ * @sk_pacing_shift: scaling factor for TCP Small Queues
|
||||||
|
* @sk_lingertime: %SO_LINGER l_linger setting
|
||||||
|
* @sk_backlog: always used with the per-socket spinlock held
|
||||||
|
* @sk_callback_lock: used with the callbacks in the end of this struct
|
||||||
|
@@ -445,6 +446,8 @@ struct sock {
|
||||||
|
sk_type : 16;
|
||||||
|
#define SK_PROTOCOL_MAX U8_MAX
|
||||||
|
u16 sk_gso_max_segs;
|
||||||
|
+#define sk_pacing_shift sk_pacing_shift /* for backport checks */
|
||||||
|
+ u8 sk_pacing_shift;
|
||||||
|
unsigned long sk_lingertime;
|
||||||
|
struct proto *sk_prot_creator;
|
||||||
|
rwlock_t sk_callback_lock;
|
||||||
|
--- a/net/core/sock.c
|
||||||
|
+++ b/net/core/sock.c
|
||||||
|
@@ -2739,6 +2739,7 @@ void sock_init_data(struct socket *sock,
|
||||||
|
|
||||||
|
sk->sk_max_pacing_rate = ~0U;
|
||||||
|
sk->sk_pacing_rate = ~0U;
|
||||||
|
+ sk->sk_pacing_shift = 10;
|
||||||
|
sk->sk_incoming_cpu = -1;
|
||||||
|
/*
|
||||||
|
* Before updating sk_refcnt, we must commit prior changes to memory
|
||||||
|
--- a/net/ipv4/tcp_output.c
|
||||||
|
+++ b/net/ipv4/tcp_output.c
|
||||||
|
@@ -1683,7 +1683,7 @@ u32 tcp_tso_autosize(const struct sock *
|
||||||
|
{
|
||||||
|
u32 bytes, segs;
|
||||||
|
|
||||||
|
- bytes = min(sk->sk_pacing_rate >> 10,
|
||||||
|
+ bytes = min(sk->sk_pacing_rate >> sk->sk_pacing_shift,
|
||||||
|
sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
|
||||||
|
|
||||||
|
/* Goal is to send at least one packet per ms,
|
||||||
|
@@ -2184,7 +2184,7 @@ static bool tcp_small_queue_check(struct
|
||||||
|
{
|
||||||
|
unsigned int limit;
|
||||||
|
|
||||||
|
- limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
|
||||||
|
+ limit = max(2 * skb->truesize, sk->sk_pacing_rate >> sk->sk_pacing_shift);
|
||||||
|
limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
|
||||||
|
limit <<= factor;
|
||||||
|
|
|
@ -0,0 +1,42 @@
|
||||||
|
From 4d304a6fe93538ce356b4593dc43476b50c023e7 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Giuseppe Lippolis <giu.lippolis@gmail.com>
|
||||||
|
Date: Mon, 23 Apr 2018 09:03:06 +0200
|
||||||
|
Subject: USB: serial: option: blacklist unused dwm-158 interfaces
|
||||||
|
|
||||||
|
The dwm-158 interface 4 and 5 doesn't answer to the AT commands
|
||||||
|
and doesn't appears a option interface.
|
||||||
|
Tested on openwrt distribution (kernel 4.14 using the old blacklist
|
||||||
|
definitions).
|
||||||
|
|
||||||
|
Lars Melin also writes:
|
||||||
|
|
||||||
|
Blacklisting interface 4 and 5 is correct because:
|
||||||
|
|
||||||
|
MI_00 D-Link Mobile Broadband Device (cdc_ether)
|
||||||
|
MI_02 D-Link HSPA+DataCard Diagnostics Interface (also ppp modem)
|
||||||
|
MI_03 D-Link HSPA+DataCard NMEA Device
|
||||||
|
MI_04 D-Link HSPA+DataCard Speech Port
|
||||||
|
MI_05 D-Link HSPA+DataCard Debug Port
|
||||||
|
MI_06 USB Mass Storage Device
|
||||||
|
|
||||||
|
Signed-off-by: Giuseppe Lippolis <giu.lippolis@gmail.com>
|
||||||
|
[ johan: add Lars's comment on the interface layout and reword summary ]
|
||||||
|
Cc: Lars Melin <larsm17@gmail.com>
|
||||||
|
Cc: Dan Williams <dcbw@redhat.com>
|
||||||
|
Signed-off-by: Johan Hovold <johan@kernel.org>
|
||||||
|
---
|
||||||
|
drivers/usb/serial/option.c | 3 ++-
|
||||||
|
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
--- a/drivers/usb/serial/option.c
|
||||||
|
+++ b/drivers/usb/serial/option.c
|
||||||
|
@@ -1927,7 +1927,8 @@ static const struct usb_device_id option
|
||||||
|
{ USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d01, 0xff) }, /* D-Link DWM-156 (variant) */
|
||||||
|
{ USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d02, 0xff) },
|
||||||
|
{ USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d03, 0xff) },
|
||||||
|
- { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d04, 0xff) }, /* D-Link DWM-158 */
|
||||||
|
+ { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d04, 0xff), /* D-Link DWM-158 */
|
||||||
|
+ .driver_info = RSVD(4) | RSVD(5) },
|
||||||
|
{ USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d0e, 0xff) }, /* D-Link DWM-157 C1 */
|
||||||
|
{ USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e19, 0xff), /* D-Link DWM-221 B1 */
|
||||||
|
.driver_info = RSVD(4) },
|
|
@ -0,0 +1,109 @@
|
||||||
|
From 531ef5ebea96394ddb7f554d4d88e017dde30a59 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Amelie Delaunay <amelie.delaunay@st.com>
|
||||||
|
Date: Tue, 13 Feb 2018 09:28:12 +0100
|
||||||
|
Subject: [PATCH] usb: dwc2: add support for host mode external vbus supply
|
||||||
|
|
||||||
|
This patch adds a way to enable an external vbus supply in host mode,
|
||||||
|
when dwc2 drvvbus signal is not used.
|
||||||
|
|
||||||
|
This patch is very similar to the one done in U-Boot dwc2 driver [1]. It
|
||||||
|
also adds dynamic vbus supply management depending on the role and state
|
||||||
|
of the core.
|
||||||
|
|
||||||
|
[1] https://lists.denx.de/pipermail/u-boot/2017-March/283434.html
|
||||||
|
|
||||||
|
Signed-off-by: Amelie Delaunay <amelie.delaunay@st.com>
|
||||||
|
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
|
||||||
|
---
|
||||||
|
drivers/usb/dwc2/core.h | 2 ++
|
||||||
|
drivers/usb/dwc2/hcd.c | 26 ++++++++++++++++++++++++++
|
||||||
|
2 files changed, 28 insertions(+)
|
||||||
|
|
||||||
|
--- a/drivers/usb/dwc2/core.h
|
||||||
|
+++ b/drivers/usb/dwc2/core.h
|
||||||
|
@@ -777,6 +777,7 @@ struct dwc2_hregs_backup {
|
||||||
|
* @plat: The platform specific configuration data. This can be
|
||||||
|
* removed once all SoCs support usb transceiver.
|
||||||
|
* @supplies: Definition of USB power supplies
|
||||||
|
+ * @vbus_supply: Regulator supplying vbus.
|
||||||
|
* @phyif: PHY interface width
|
||||||
|
* @lock: Spinlock that protects all the driver data structures
|
||||||
|
* @priv: Stores a pointer to the struct usb_hcd
|
||||||
|
@@ -914,6 +915,7 @@ struct dwc2_hsotg {
|
||||||
|
struct usb_phy *uphy;
|
||||||
|
struct dwc2_hsotg_plat *plat;
|
||||||
|
struct regulator_bulk_data supplies[DWC2_NUM_SUPPLIES];
|
||||||
|
+ struct regulator *vbus_supply;
|
||||||
|
u32 phyif;
|
||||||
|
|
||||||
|
spinlock_t lock;
|
||||||
|
--- a/drivers/usb/dwc2/hcd.c
|
||||||
|
+++ b/drivers/usb/dwc2/hcd.c
|
||||||
|
@@ -359,6 +359,23 @@ static void dwc2_gusbcfg_init(struct dwc
|
||||||
|
dwc2_writel(usbcfg, hsotg->regs + GUSBCFG);
|
||||||
|
}
|
||||||
|
|
||||||
|
+static int dwc2_vbus_supply_init(struct dwc2_hsotg *hsotg)
|
||||||
|
+{
|
||||||
|
+ hsotg->vbus_supply = devm_regulator_get_optional(hsotg->dev, "vbus");
|
||||||
|
+ if (IS_ERR(hsotg->vbus_supply))
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+ return regulator_enable(hsotg->vbus_supply);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int dwc2_vbus_supply_exit(struct dwc2_hsotg *hsotg)
|
||||||
|
+{
|
||||||
|
+ if (hsotg->vbus_supply)
|
||||||
|
+ return regulator_disable(hsotg->vbus_supply);
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/**
|
||||||
|
* dwc2_enable_host_interrupts() - Enables the Host mode interrupts
|
||||||
|
*
|
||||||
|
@@ -3342,6 +3359,7 @@ static void dwc2_conn_id_status_change(s
|
||||||
|
|
||||||
|
/* B-Device connector (Device Mode) */
|
||||||
|
if (gotgctl & GOTGCTL_CONID_B) {
|
||||||
|
+ dwc2_vbus_supply_exit(hsotg);
|
||||||
|
/* Wait for switch to device mode */
|
||||||
|
dev_dbg(hsotg->dev, "connId B\n");
|
||||||
|
if (hsotg->bus_suspended) {
|
||||||
|
@@ -4448,6 +4466,9 @@ static int _dwc2_hcd_start(struct usb_hc
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_unlock_irqrestore(&hsotg->lock, flags);
|
||||||
|
+
|
||||||
|
+ dwc2_vbus_supply_init(hsotg);
|
||||||
|
+
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -4475,6 +4496,8 @@ static void _dwc2_hcd_stop(struct usb_hc
|
||||||
|
clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
|
||||||
|
spin_unlock_irqrestore(&hsotg->lock, flags);
|
||||||
|
|
||||||
|
+ dwc2_vbus_supply_exit(hsotg);
|
||||||
|
+
|
||||||
|
usleep_range(1000, 3000);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -4511,6 +4534,7 @@ static int _dwc2_hcd_suspend(struct usb_
|
||||||
|
hprt0 |= HPRT0_SUSP;
|
||||||
|
hprt0 &= ~HPRT0_PWR;
|
||||||
|
dwc2_writel(hprt0, hsotg->regs + HPRT0);
|
||||||
|
+ dwc2_vbus_supply_exit(hsotg);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Enter hibernation */
|
||||||
|
@@ -4591,6 +4615,8 @@ static int _dwc2_hcd_resume(struct usb_h
|
||||||
|
spin_unlock_irqrestore(&hsotg->lock, flags);
|
||||||
|
dwc2_port_resume(hsotg);
|
||||||
|
} else {
|
||||||
|
+ dwc2_vbus_supply_init(hsotg);
|
||||||
|
+
|
||||||
|
/* Wait for controller to correctly update D+/D- level */
|
||||||
|
usleep_range(3000, 5000);
|
||||||
|
|
|
@ -0,0 +1,55 @@
|
||||||
|
From 438fea2a6325933868aebc20279e2669c9a21207 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tomeu Vizoso <tomeu.vizoso@collabora.com>
|
||||||
|
Date: Mon, 26 Mar 2018 11:00:01 +0200
|
||||||
|
Subject: [PATCH] usb: dwc2: dwc2_vbus_supply_init: fix error check
|
||||||
|
|
||||||
|
devm_regulator_get_optional returns -ENODEV if the regulator isn't
|
||||||
|
there, so if that's the case we have to make sure not to leave -ENODEV
|
||||||
|
in the regulator pointer.
|
||||||
|
|
||||||
|
Also, make sure we return 0 in that case, but correctly propagate any
|
||||||
|
other errors. Also propagate the error from _dwc2_hcd_start.
|
||||||
|
|
||||||
|
Fixes: 531ef5ebea96 ("usb: dwc2: add support for host mode external vbus supply")
|
||||||
|
Cc: Amelie Delaunay <amelie.delaunay@st.com>
|
||||||
|
Reviewed-by: Amelie Delaunay <amelie.delaunay@st.com>
|
||||||
|
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
|
||||||
|
Reviewed-by: Grigor Tovmasyan <tovmasya@synopsys.com>
|
||||||
|
Tested-by: Heiko Stuebner <heiko@sntech.de>
|
||||||
|
Acked-by: Minas Harutyunyan <hminas@synopsys.com>
|
||||||
|
Signed-off-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
|
||||||
|
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
|
||||||
|
---
|
||||||
|
drivers/usb/dwc2/hcd.c | 13 ++++++++-----
|
||||||
|
1 file changed, 8 insertions(+), 5 deletions(-)
|
||||||
|
|
||||||
|
--- a/drivers/usb/dwc2/hcd.c
|
||||||
|
+++ b/drivers/usb/dwc2/hcd.c
|
||||||
|
@@ -361,9 +361,14 @@ static void dwc2_gusbcfg_init(struct dwc
|
||||||
|
|
||||||
|
static int dwc2_vbus_supply_init(struct dwc2_hsotg *hsotg)
|
||||||
|
{
|
||||||
|
+ int ret;
|
||||||
|
+
|
||||||
|
hsotg->vbus_supply = devm_regulator_get_optional(hsotg->dev, "vbus");
|
||||||
|
- if (IS_ERR(hsotg->vbus_supply))
|
||||||
|
- return 0;
|
||||||
|
+ if (IS_ERR(hsotg->vbus_supply)) {
|
||||||
|
+ ret = PTR_ERR(hsotg->vbus_supply);
|
||||||
|
+ hsotg->vbus_supply = NULL;
|
||||||
|
+ return ret == -ENODEV ? 0 : ret;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
return regulator_enable(hsotg->vbus_supply);
|
||||||
|
}
|
||||||
|
@@ -4467,9 +4472,7 @@ static int _dwc2_hcd_start(struct usb_hc
|
||||||
|
|
||||||
|
spin_unlock_irqrestore(&hsotg->lock, flags);
|
||||||
|
|
||||||
|
- dwc2_vbus_supply_init(hsotg);
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
+ return dwc2_vbus_supply_init(hsotg);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
|
@ -0,0 +1,74 @@
|
||||||
|
From 2c77c57d22adb05b21cdb333a0c42bdfa0e19835 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||||
|
Date: Tue, 16 Jan 2018 16:45:41 +0100
|
||||||
|
Subject: [PATCH] mtd: move code adding master MTD out of
|
||||||
|
mtd_add_device_partitions()
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
This change is a small cleanup of mtd_device_parse_register(). When
|
||||||
|
using MTD_PARTITIONED_MASTER it makes sure a master MTD is registered
|
||||||
|
before dealing with partitions. The advantage of this is not mixing
|
||||||
|
code handling master MTD with code handling partitions.
|
||||||
|
|
||||||
|
This commit doesn't change any behavior except from a slightly different
|
||||||
|
failure code path. The new code may need to call del_mtd_device when
|
||||||
|
something goes wrong.
|
||||||
|
|
||||||
|
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||||
|
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
|
||||||
|
---
|
||||||
|
drivers/mtd/mtdcore.c | 25 +++++++++++++------------
|
||||||
|
1 file changed, 13 insertions(+), 12 deletions(-)
|
||||||
|
|
||||||
|
--- a/drivers/mtd/mtdcore.c
|
||||||
|
+++ b/drivers/mtd/mtdcore.c
|
||||||
|
@@ -641,20 +641,12 @@ static int mtd_add_device_partitions(str
|
||||||
|
{
|
||||||
|
const struct mtd_partition *real_parts = parts->parts;
|
||||||
|
int nbparts = parts->nr_parts;
|
||||||
|
- int ret;
|
||||||
|
|
||||||
|
- if (nbparts == 0 || IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) {
|
||||||
|
- ret = add_mtd_device(mtd);
|
||||||
|
- if (ret)
|
||||||
|
- return ret;
|
||||||
|
- }
|
||||||
|
+ if (!nbparts && !device_is_registered(&mtd->dev))
|
||||||
|
+ return add_mtd_device(mtd);
|
||||||
|
|
||||||
|
- if (nbparts > 0) {
|
||||||
|
- ret = add_mtd_partitions(mtd, real_parts, nbparts);
|
||||||
|
- if (ret && IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER))
|
||||||
|
- del_mtd_device(mtd);
|
||||||
|
- return ret;
|
||||||
|
- }
|
||||||
|
+ if (nbparts > 0)
|
||||||
|
+ return add_mtd_partitions(mtd, real_parts, nbparts);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
@@ -714,6 +706,12 @@ int mtd_device_parse_register(struct mtd
|
||||||
|
|
||||||
|
mtd_set_dev_defaults(mtd);
|
||||||
|
|
||||||
|
+ if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) {
|
||||||
|
+ ret = add_mtd_device(mtd);
|
||||||
|
+ if (ret)
|
||||||
|
+ return ret;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
memset(&parsed, 0, sizeof(parsed));
|
||||||
|
|
||||||
|
ret = parse_mtd_partitions(mtd, types, &parsed, parser_data);
|
||||||
|
@@ -753,6 +751,9 @@ int mtd_device_parse_register(struct mtd
|
||||||
|
out:
|
||||||
|
/* Cleanup any parsed partitions */
|
||||||
|
mtd_part_parser_cleanup(&parsed);
|
||||||
|
+ if (ret && device_is_registered(&mtd->dev))
|
||||||
|
+ del_mtd_device(mtd);
|
||||||
|
+
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(mtd_device_parse_register);
|
|
@ -0,0 +1,93 @@
|
||||||
|
From 0dbe4ea78d69756efeb0bba0764f6bd4a9ee9567 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||||
|
Date: Tue, 16 Jan 2018 16:45:42 +0100
|
||||||
|
Subject: [PATCH] mtd: get rid of the mtd_add_device_partitions()
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
This simplifies code a bit by:
|
||||||
|
1) Avoiding an extra (tiny) function
|
||||||
|
2) Checking for amount of parsed (found) partitions just once
|
||||||
|
3) Avoiding clearing/filling struct mtd_partitions manually
|
||||||
|
|
||||||
|
With this commit proper functions are called directly from the
|
||||||
|
mtd_device_parse_register(). It doesn't need to use minor tricks like
|
||||||
|
memsetting struct to 0 to trigger an expected
|
||||||
|
mtd_add_device_partitions() behavior.
|
||||||
|
|
||||||
|
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||||
|
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
|
||||||
|
---
|
||||||
|
drivers/mtd/mtdcore.c | 43 ++++++++++++-------------------------------
|
||||||
|
1 file changed, 12 insertions(+), 31 deletions(-)
|
||||||
|
|
||||||
|
--- a/drivers/mtd/mtdcore.c
|
||||||
|
+++ b/drivers/mtd/mtdcore.c
|
||||||
|
@@ -636,21 +636,6 @@ out_error:
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static int mtd_add_device_partitions(struct mtd_info *mtd,
|
||||||
|
- struct mtd_partitions *parts)
|
||||||
|
-{
|
||||||
|
- const struct mtd_partition *real_parts = parts->parts;
|
||||||
|
- int nbparts = parts->nr_parts;
|
||||||
|
-
|
||||||
|
- if (!nbparts && !device_is_registered(&mtd->dev))
|
||||||
|
- return add_mtd_device(mtd);
|
||||||
|
-
|
||||||
|
- if (nbparts > 0)
|
||||||
|
- return add_mtd_partitions(mtd, real_parts, nbparts);
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
/*
|
||||||
|
* Set a few defaults based on the parent devices, if not provided by the
|
||||||
|
* driver
|
||||||
|
@@ -701,7 +686,7 @@ int mtd_device_parse_register(struct mtd
|
||||||
|
const struct mtd_partition *parts,
|
||||||
|
int nr_parts)
|
||||||
|
{
|
||||||
|
- struct mtd_partitions parsed;
|
||||||
|
+ struct mtd_partitions parsed = { };
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
mtd_set_dev_defaults(mtd);
|
||||||
|
@@ -712,24 +697,20 @@ int mtd_device_parse_register(struct mtd
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
- memset(&parsed, 0, sizeof(parsed));
|
||||||
|
-
|
||||||
|
+ /* Prefer parsed partitions over driver-provided fallback */
|
||||||
|
ret = parse_mtd_partitions(mtd, types, &parsed, parser_data);
|
||||||
|
- if ((ret < 0 || parsed.nr_parts == 0) && parts && nr_parts) {
|
||||||
|
- /* Fall back to driver-provided partitions */
|
||||||
|
- parsed = (struct mtd_partitions){
|
||||||
|
- .parts = parts,
|
||||||
|
- .nr_parts = nr_parts,
|
||||||
|
- };
|
||||||
|
- } else if (ret < 0) {
|
||||||
|
- /* Didn't come up with parsed OR fallback partitions */
|
||||||
|
- pr_info("mtd: failed to find partitions; one or more parsers reports errors (%d)\n",
|
||||||
|
- ret);
|
||||||
|
- /* Don't abort on errors; we can still use unpartitioned MTD */
|
||||||
|
- memset(&parsed, 0, sizeof(parsed));
|
||||||
|
+ if (!ret && parsed.nr_parts) {
|
||||||
|
+ parts = parsed.parts;
|
||||||
|
+ nr_parts = parsed.nr_parts;
|
||||||
|
}
|
||||||
|
|
||||||
|
- ret = mtd_add_device_partitions(mtd, &parsed);
|
||||||
|
+ if (nr_parts)
|
||||||
|
+ ret = add_mtd_partitions(mtd, parts, nr_parts);
|
||||||
|
+ else if (!device_is_registered(&mtd->dev))
|
||||||
|
+ ret = add_mtd_device(mtd);
|
||||||
|
+ else
|
||||||
|
+ ret = 0;
|
||||||
|
+
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
|
|
@ -0,0 +1,200 @@
|
||||||
|
From 5b644aa012f67fd211138a067b9f351f30bdcc60 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||||
|
Date: Wed, 14 Mar 2018 13:10:42 +0100
|
||||||
|
Subject: [PATCH] mtd: partitions: add of_match_table parser matching for the
|
||||||
|
"ofpart" type
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
In order to properly support compatibility strings as described in the
|
||||||
|
bindings/mtd/partition.txt "ofpart" type should be treated as an
|
||||||
|
indication for looking into OF. MTD should check "compatible" property
|
||||||
|
and search for a matching parser rather than blindly trying the one
|
||||||
|
supporting "fixed-partitions".
|
||||||
|
|
||||||
|
It also means that existing "fixed-partitions" parser should get renamed
|
||||||
|
to use a more meaningful name.
|
||||||
|
|
||||||
|
This commit achievies that aim by introducing a new mtd_part_of_parse().
|
||||||
|
It works by looking for a matching parser for every string in the
|
||||||
|
"compatibility" property (starting with the most specific one).
|
||||||
|
|
||||||
|
Please note that driver-specified parsers still take a precedence. It's
|
||||||
|
assumed that driver providing a parser type has a good reason for that
|
||||||
|
(e.g. having platform data with device-specific info). Also doing
|
||||||
|
otherwise could break existing setups. The same applies to using default
|
||||||
|
parsers (including "cmdlinepart") as some overwrite DT data with cmdline
|
||||||
|
argument.
|
||||||
|
|
||||||
|
Partition parsers can now provide an of_match_table to enable
|
||||||
|
flash<-->parser matching via device tree as documented in the
|
||||||
|
mtd/partition.txt.
|
||||||
|
|
||||||
|
This support is currently limited to built-in parsers as it uses
|
||||||
|
request_module() and friends. This should be sufficient for most cases
|
||||||
|
though as compiling parsers as modules isn't a common choice.
|
||||||
|
|
||||||
|
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
|
||||||
|
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||||
|
Tested-by: Peter Rosin <peda@axentia.se>
|
||||||
|
Reviewed-by: Richard Weinberger <richard@nod.at>
|
||||||
|
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
|
||||||
|
---
|
||||||
|
drivers/mtd/mtdpart.c | 116 +++++++++++++++++++++++++++++++++++++----
|
||||||
|
include/linux/mtd/partitions.h | 1 +
|
||||||
|
2 files changed, 108 insertions(+), 9 deletions(-)
|
||||||
|
|
||||||
|
--- a/drivers/mtd/mtdpart.c
|
||||||
|
+++ b/drivers/mtd/mtdpart.c
|
||||||
|
@@ -30,6 +30,7 @@
|
||||||
|
#include <linux/mtd/mtd.h>
|
||||||
|
#include <linux/mtd/partitions.h>
|
||||||
|
#include <linux/err.h>
|
||||||
|
+#include <linux/of.h>
|
||||||
|
|
||||||
|
#include "mtdcore.h"
|
||||||
|
|
||||||
|
@@ -894,6 +895,92 @@ static int mtd_part_do_parse(struct mtd_
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
+ * mtd_part_get_compatible_parser - find MTD parser by a compatible string
|
||||||
|
+ *
|
||||||
|
+ * @compat: compatible string describing partitions in a device tree
|
||||||
|
+ *
|
||||||
|
+ * MTD parsers can specify supported partitions by providing a table of
|
||||||
|
+ * compatibility strings. This function finds a parser that advertises support
|
||||||
|
+ * for a passed value of "compatible".
|
||||||
|
+ */
|
||||||
|
+static struct mtd_part_parser *mtd_part_get_compatible_parser(const char *compat)
|
||||||
|
+{
|
||||||
|
+ struct mtd_part_parser *p, *ret = NULL;
|
||||||
|
+
|
||||||
|
+ spin_lock(&part_parser_lock);
|
||||||
|
+
|
||||||
|
+ list_for_each_entry(p, &part_parsers, list) {
|
||||||
|
+ const struct of_device_id *matches;
|
||||||
|
+
|
||||||
|
+ matches = p->of_match_table;
|
||||||
|
+ if (!matches)
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ for (; matches->compatible[0]; matches++) {
|
||||||
|
+ if (!strcmp(matches->compatible, compat) &&
|
||||||
|
+ try_module_get(p->owner)) {
|
||||||
|
+ ret = p;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (ret)
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ spin_unlock(&part_parser_lock);
|
||||||
|
+
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int mtd_part_of_parse(struct mtd_info *master,
|
||||||
|
+ struct mtd_partitions *pparts)
|
||||||
|
+{
|
||||||
|
+ struct mtd_part_parser *parser;
|
||||||
|
+ struct device_node *np;
|
||||||
|
+ struct property *prop;
|
||||||
|
+ const char *compat;
|
||||||
|
+ const char *fixed = "ofpart";
|
||||||
|
+ int ret, err = 0;
|
||||||
|
+
|
||||||
|
+ np = of_get_child_by_name(mtd_get_of_node(master), "partitions");
|
||||||
|
+ of_property_for_each_string(np, "compatible", prop, compat) {
|
||||||
|
+ parser = mtd_part_get_compatible_parser(compat);
|
||||||
|
+ if (!parser)
|
||||||
|
+ continue;
|
||||||
|
+ ret = mtd_part_do_parse(parser, master, pparts, NULL);
|
||||||
|
+ if (ret > 0) {
|
||||||
|
+ of_node_put(np);
|
||||||
|
+ return ret;
|
||||||
|
+ }
|
||||||
|
+ mtd_part_parser_put(parser);
|
||||||
|
+ if (ret < 0 && !err)
|
||||||
|
+ err = ret;
|
||||||
|
+ }
|
||||||
|
+ of_node_put(np);
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * For backward compatibility we have to try the "ofpart"
|
||||||
|
+ * parser. It supports old DT format with partitions specified as a
|
||||||
|
+ * direct subnodes of a flash device DT node without any compatibility
|
||||||
|
+ * specified we could match.
|
||||||
|
+ */
|
||||||
|
+ parser = mtd_part_parser_get(fixed);
|
||||||
|
+ if (!parser && !request_module("%s", fixed))
|
||||||
|
+ parser = mtd_part_parser_get(fixed);
|
||||||
|
+ if (parser) {
|
||||||
|
+ ret = mtd_part_do_parse(parser, master, pparts, NULL);
|
||||||
|
+ if (ret > 0)
|
||||||
|
+ return ret;
|
||||||
|
+ mtd_part_parser_put(parser);
|
||||||
|
+ if (ret < 0 && !err)
|
||||||
|
+ err = ret;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return err;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
* parse_mtd_partitions - parse MTD partitions
|
||||||
|
* @master: the master partition (describes whole MTD device)
|
||||||
|
* @types: names of partition parsers to try or %NULL
|
||||||
|
@@ -925,19 +1012,30 @@ int parse_mtd_partitions(struct mtd_info
|
||||||
|
types = default_mtd_part_types;
|
||||||
|
|
||||||
|
for ( ; *types; types++) {
|
||||||
|
- pr_debug("%s: parsing partitions %s\n", master->name, *types);
|
||||||
|
- parser = mtd_part_parser_get(*types);
|
||||||
|
- if (!parser && !request_module("%s", *types))
|
||||||
|
+ /*
|
||||||
|
+ * ofpart is a special type that means OF partitioning info
|
||||||
|
+ * should be used. It requires a bit different logic so it is
|
||||||
|
+ * handled in a separated function.
|
||||||
|
+ */
|
||||||
|
+ if (!strcmp(*types, "ofpart")) {
|
||||||
|
+ ret = mtd_part_of_parse(master, pparts);
|
||||||
|
+ } else {
|
||||||
|
+ pr_debug("%s: parsing partitions %s\n", master->name,
|
||||||
|
+ *types);
|
||||||
|
parser = mtd_part_parser_get(*types);
|
||||||
|
- pr_debug("%s: got parser %s\n", master->name,
|
||||||
|
- parser ? parser->name : NULL);
|
||||||
|
- if (!parser)
|
||||||
|
- continue;
|
||||||
|
- ret = mtd_part_do_parse(parser, master, pparts, data);
|
||||||
|
+ if (!parser && !request_module("%s", *types))
|
||||||
|
+ parser = mtd_part_parser_get(*types);
|
||||||
|
+ pr_debug("%s: got parser %s\n", master->name,
|
||||||
|
+ parser ? parser->name : NULL);
|
||||||
|
+ if (!parser)
|
||||||
|
+ continue;
|
||||||
|
+ ret = mtd_part_do_parse(parser, master, pparts, data);
|
||||||
|
+ if (ret <= 0)
|
||||||
|
+ mtd_part_parser_put(parser);
|
||||||
|
+ }
|
||||||
|
/* Found partitions! */
|
||||||
|
if (ret > 0)
|
||||||
|
return 0;
|
||||||
|
- mtd_part_parser_put(parser);
|
||||||
|
/*
|
||||||
|
* Stash the first error we see; only report it if no parser
|
||||||
|
* succeeds
|
||||||
|
--- a/include/linux/mtd/partitions.h
|
||||||
|
+++ b/include/linux/mtd/partitions.h
|
||||||
|
@@ -77,6 +77,7 @@ struct mtd_part_parser {
|
||||||
|
struct list_head list;
|
||||||
|
struct module *owner;
|
||||||
|
const char *name;
|
||||||
|
+ const struct of_device_id *of_match_table;
|
||||||
|
int (*parse_fn)(struct mtd_info *, const struct mtd_partition **,
|
||||||
|
struct mtd_part_parser_data *);
|
||||||
|
void (*cleanup)(const struct mtd_partition *pparts, int nr_parts);
|
|
@ -0,0 +1,74 @@
|
||||||
|
From c0faf43482e7f7dfb6d61847cb93d17748560b24 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||||
|
Date: Wed, 14 Mar 2018 13:10:43 +0100
|
||||||
|
Subject: [PATCH] mtd: rename "ofpart" parser to "fixed-partitions" as it fits
|
||||||
|
it better
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
Type "ofpart" means that OF should be used to get partitioning info and
|
||||||
|
this driver supports "fixed-partitions" binding only. Renaming it should
|
||||||
|
lead to less confusion especially when parsers for new compatibility
|
||||||
|
strings start to appear.
|
||||||
|
|
||||||
|
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||||
|
Reviewed-by: Richard Weinberger <richard@nod.at>
|
||||||
|
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
|
||||||
|
---
|
||||||
|
drivers/mtd/mtdpart.c | 4 ++--
|
||||||
|
drivers/mtd/ofpart.c | 11 ++++++-----
|
||||||
|
2 files changed, 8 insertions(+), 7 deletions(-)
|
||||||
|
|
||||||
|
--- a/drivers/mtd/mtdpart.c
|
||||||
|
+++ b/drivers/mtd/mtdpart.c
|
||||||
|
@@ -940,7 +940,7 @@ static int mtd_part_of_parse(struct mtd_
|
||||||
|
struct device_node *np;
|
||||||
|
struct property *prop;
|
||||||
|
const char *compat;
|
||||||
|
- const char *fixed = "ofpart";
|
||||||
|
+ const char *fixed = "fixed-partitions";
|
||||||
|
int ret, err = 0;
|
||||||
|
|
||||||
|
np = of_get_child_by_name(mtd_get_of_node(master), "partitions");
|
||||||
|
@@ -960,7 +960,7 @@ static int mtd_part_of_parse(struct mtd_
|
||||||
|
of_node_put(np);
|
||||||
|
|
||||||
|
/*
|
||||||
|
- * For backward compatibility we have to try the "ofpart"
|
||||||
|
+ * For backward compatibility we have to try the "fixed-partitions"
|
||||||
|
* parser. It supports old DT format with partitions specified as a
|
||||||
|
* direct subnodes of a flash device DT node without any compatibility
|
||||||
|
* specified we could match.
|
||||||
|
--- a/drivers/mtd/ofpart.c
|
||||||
|
+++ b/drivers/mtd/ofpart.c
|
||||||
|
@@ -25,9 +25,9 @@ static bool node_has_compatible(struct d
|
||||||
|
return of_get_property(pp, "compatible", NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static int parse_ofpart_partitions(struct mtd_info *master,
|
||||||
|
- const struct mtd_partition **pparts,
|
||||||
|
- struct mtd_part_parser_data *data)
|
||||||
|
+static int parse_fixed_partitions(struct mtd_info *master,
|
||||||
|
+ const struct mtd_partition **pparts,
|
||||||
|
+ struct mtd_part_parser_data *data)
|
||||||
|
{
|
||||||
|
struct mtd_partition *parts;
|
||||||
|
struct device_node *mtd_node;
|
||||||
|
@@ -141,8 +141,8 @@ ofpart_none:
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct mtd_part_parser ofpart_parser = {
|
||||||
|
- .parse_fn = parse_ofpart_partitions,
|
||||||
|
- .name = "ofpart",
|
||||||
|
+ .parse_fn = parse_fixed_partitions,
|
||||||
|
+ .name = "fixed-partitions",
|
||||||
|
};
|
||||||
|
|
||||||
|
static int parse_ofoldpart_partitions(struct mtd_info *master,
|
||||||
|
@@ -229,4 +229,5 @@ MODULE_AUTHOR("Vitaly Wool, David Gibson
|
||||||
|
* with the same name. Since we provide the ofoldpart parser, we should have
|
||||||
|
* the corresponding alias.
|
||||||
|
*/
|
||||||
|
+MODULE_ALIAS("fixed-partitions");
|
||||||
|
MODULE_ALIAS("ofoldpart");
|
|
@ -0,0 +1,44 @@
|
||||||
|
From 97b0c7c0df3efd7048ed39d7e2dee34cafd55887 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||||
|
Date: Wed, 14 Mar 2018 13:10:44 +0100
|
||||||
|
Subject: [PATCH] mtd: ofpart: add of_match_table with "fixed-partitions"
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
This allows using this parser with any flash driver that takes care of
|
||||||
|
setting of_node (using mtd_set_of_node helper) correctly. Up to now
|
||||||
|
support for "fixed-partitions" DT compatibility string was working only
|
||||||
|
with flash drivers that were specifying "ofpart" (manually or by letting
|
||||||
|
mtd use the default set of parsers).
|
||||||
|
|
||||||
|
This matches existing bindings documentation.
|
||||||
|
|
||||||
|
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||||
|
Reviewed-by: Brian Norris <computersforpeace@gmail.com>
|
||||||
|
Tested-by: Brian Norris <computersforpeace@gmail.com>
|
||||||
|
Reviewed-by: Richard Weinberger <richard@nod.at>
|
||||||
|
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
|
||||||
|
---
|
||||||
|
drivers/mtd/ofpart.c | 7 +++++++
|
||||||
|
1 file changed, 7 insertions(+)
|
||||||
|
|
||||||
|
--- a/drivers/mtd/ofpart.c
|
||||||
|
+++ b/drivers/mtd/ofpart.c
|
||||||
|
@@ -140,9 +140,16 @@ ofpart_none:
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static const struct of_device_id parse_ofpart_match_table[] = {
|
||||||
|
+ { .compatible = "fixed-partitions" },
|
||||||
|
+ {},
|
||||||
|
+};
|
||||||
|
+MODULE_DEVICE_TABLE(of, parse_ofpart_match_table);
|
||||||
|
+
|
||||||
|
static struct mtd_part_parser ofpart_parser = {
|
||||||
|
.parse_fn = parse_fixed_partitions,
|
||||||
|
.name = "fixed-partitions",
|
||||||
|
+ .of_match_table = parse_ofpart_match_table,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int parse_ofoldpart_partitions(struct mtd_info *master,
|
|
@ -0,0 +1,168 @@
|
||||||
|
From 5ac67ce36cfe38b4c104a42ce52c5c8d526f1c95 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||||
|
Date: Tue, 27 Mar 2018 22:35:41 +0200
|
||||||
|
Subject: [PATCH] mtd: move code adding (registering) partitions to the
|
||||||
|
parse_mtd_partitions()
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
This commit slightly simplifies the code. Every parse_mtd_partitions()
|
||||||
|
caller (out of two existing ones) had to add partitions & cleanup parser
|
||||||
|
on its own. This moves that responsibility into the function.
|
||||||
|
|
||||||
|
That change also allows dropping struct mtd_partitions argument.
|
||||||
|
|
||||||
|
There is one minor behavior change caused by this cleanup. If
|
||||||
|
parse_mtd_partitions() fails to add partitions (add_mtd_partitions()
|
||||||
|
return an error) then mtd_device_parse_register() will still try to
|
||||||
|
add (register) fallback partitions. It's a real corner case affecting
|
||||||
|
one of uncommon error paths and shouldn't cause any harm.
|
||||||
|
|
||||||
|
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||||
|
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
|
||||||
|
---
|
||||||
|
drivers/mtd/mtdcore.c | 14 ++++----------
|
||||||
|
drivers/mtd/mtdcore.h | 1 -
|
||||||
|
drivers/mtd/mtdpart.c | 44 ++++++++++++++++----------------------------
|
||||||
|
3 files changed, 20 insertions(+), 39 deletions(-)
|
||||||
|
|
||||||
|
--- a/drivers/mtd/mtdcore.c
|
||||||
|
+++ b/drivers/mtd/mtdcore.c
|
||||||
|
@@ -686,7 +686,6 @@ int mtd_device_parse_register(struct mtd
|
||||||
|
const struct mtd_partition *parts,
|
||||||
|
int nr_parts)
|
||||||
|
{
|
||||||
|
- struct mtd_partitions parsed = { };
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
mtd_set_dev_defaults(mtd);
|
||||||
|
@@ -698,13 +697,10 @@ int mtd_device_parse_register(struct mtd
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Prefer parsed partitions over driver-provided fallback */
|
||||||
|
- ret = parse_mtd_partitions(mtd, types, &parsed, parser_data);
|
||||||
|
- if (!ret && parsed.nr_parts) {
|
||||||
|
- parts = parsed.parts;
|
||||||
|
- nr_parts = parsed.nr_parts;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- if (nr_parts)
|
||||||
|
+ ret = parse_mtd_partitions(mtd, types, parser_data);
|
||||||
|
+ if (ret > 0)
|
||||||
|
+ ret = 0;
|
||||||
|
+ else if (nr_parts)
|
||||||
|
ret = add_mtd_partitions(mtd, parts, nr_parts);
|
||||||
|
else if (!device_is_registered(&mtd->dev))
|
||||||
|
ret = add_mtd_device(mtd);
|
||||||
|
@@ -730,8 +726,6 @@ int mtd_device_parse_register(struct mtd
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
- /* Cleanup any parsed partitions */
|
||||||
|
- mtd_part_parser_cleanup(&parsed);
|
||||||
|
if (ret && device_is_registered(&mtd->dev))
|
||||||
|
del_mtd_device(mtd);
|
||||||
|
|
||||||
|
--- a/drivers/mtd/mtdcore.h
|
||||||
|
+++ b/drivers/mtd/mtdcore.h
|
||||||
|
@@ -15,7 +15,6 @@ int del_mtd_partitions(struct mtd_info *
|
||||||
|
struct mtd_partitions;
|
||||||
|
|
||||||
|
int parse_mtd_partitions(struct mtd_info *master, const char * const *types,
|
||||||
|
- struct mtd_partitions *pparts,
|
||||||
|
struct mtd_part_parser_data *data);
|
||||||
|
|
||||||
|
void mtd_part_parser_cleanup(struct mtd_partitions *parts);
|
||||||
|
--- a/drivers/mtd/mtdpart.c
|
||||||
|
+++ b/drivers/mtd/mtdpart.c
|
||||||
|
@@ -383,20 +383,7 @@ static inline void free_partition(struct
|
||||||
|
*/
|
||||||
|
static int mtd_parse_part(struct mtd_part *slave, const char *const *types)
|
||||||
|
{
|
||||||
|
- struct mtd_partitions parsed;
|
||||||
|
- int err;
|
||||||
|
-
|
||||||
|
- err = parse_mtd_partitions(&slave->mtd, types, &parsed, NULL);
|
||||||
|
- if (err)
|
||||||
|
- return err;
|
||||||
|
- else if (!parsed.nr_parts)
|
||||||
|
- return -ENOENT;
|
||||||
|
-
|
||||||
|
- err = add_mtd_partitions(&slave->mtd, parsed.parts, parsed.nr_parts);
|
||||||
|
-
|
||||||
|
- mtd_part_parser_cleanup(&parsed);
|
||||||
|
-
|
||||||
|
- return err;
|
||||||
|
+ return parse_mtd_partitions(&slave->mtd, types, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct mtd_part *allocate_partition(struct mtd_info *parent,
|
||||||
|
@@ -981,30 +968,27 @@ static int mtd_part_of_parse(struct mtd_
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
- * parse_mtd_partitions - parse MTD partitions
|
||||||
|
+ * parse_mtd_partitions - parse and register MTD partitions
|
||||||
|
+ *
|
||||||
|
* @master: the master partition (describes whole MTD device)
|
||||||
|
* @types: names of partition parsers to try or %NULL
|
||||||
|
- * @pparts: info about partitions found is returned here
|
||||||
|
* @data: MTD partition parser-specific data
|
||||||
|
*
|
||||||
|
- * This function tries to find partition on MTD device @master. It uses MTD
|
||||||
|
- * partition parsers, specified in @types. However, if @types is %NULL, then
|
||||||
|
- * the default list of parsers is used. The default list contains only the
|
||||||
|
+ * This function tries to find & register partitions on MTD device @master. It
|
||||||
|
+ * uses MTD partition parsers, specified in @types. However, if @types is %NULL,
|
||||||
|
+ * then the default list of parsers is used. The default list contains only the
|
||||||
|
* "cmdlinepart" and "ofpart" parsers ATM.
|
||||||
|
* Note: If there are more then one parser in @types, the kernel only takes the
|
||||||
|
* partitions parsed out by the first parser.
|
||||||
|
*
|
||||||
|
* This function may return:
|
||||||
|
* o a negative error code in case of failure
|
||||||
|
- * o zero otherwise, and @pparts will describe the partitions, number of
|
||||||
|
- * partitions, and the parser which parsed them. Caller must release
|
||||||
|
- * resources with mtd_part_parser_cleanup() when finished with the returned
|
||||||
|
- * data.
|
||||||
|
+ * o number of found partitions otherwise
|
||||||
|
*/
|
||||||
|
int parse_mtd_partitions(struct mtd_info *master, const char *const *types,
|
||||||
|
- struct mtd_partitions *pparts,
|
||||||
|
struct mtd_part_parser_data *data)
|
||||||
|
{
|
||||||
|
+ struct mtd_partitions pparts = { };
|
||||||
|
struct mtd_part_parser *parser;
|
||||||
|
int ret, err = 0;
|
||||||
|
|
||||||
|
@@ -1018,7 +1002,7 @@ int parse_mtd_partitions(struct mtd_info
|
||||||
|
* handled in a separated function.
|
||||||
|
*/
|
||||||
|
if (!strcmp(*types, "ofpart")) {
|
||||||
|
- ret = mtd_part_of_parse(master, pparts);
|
||||||
|
+ ret = mtd_part_of_parse(master, &pparts);
|
||||||
|
} else {
|
||||||
|
pr_debug("%s: parsing partitions %s\n", master->name,
|
||||||
|
*types);
|
||||||
|
@@ -1029,13 +1013,17 @@ int parse_mtd_partitions(struct mtd_info
|
||||||
|
parser ? parser->name : NULL);
|
||||||
|
if (!parser)
|
||||||
|
continue;
|
||||||
|
- ret = mtd_part_do_parse(parser, master, pparts, data);
|
||||||
|
+ ret = mtd_part_do_parse(parser, master, &pparts, data);
|
||||||
|
if (ret <= 0)
|
||||||
|
mtd_part_parser_put(parser);
|
||||||
|
}
|
||||||
|
/* Found partitions! */
|
||||||
|
- if (ret > 0)
|
||||||
|
- return 0;
|
||||||
|
+ if (ret > 0) {
|
||||||
|
+ err = add_mtd_partitions(master, pparts.parts,
|
||||||
|
+ pparts.nr_parts);
|
||||||
|
+ mtd_part_parser_cleanup(&pparts);
|
||||||
|
+ return err ? err : pparts.nr_parts;
|
||||||
|
+ }
|
||||||
|
/*
|
||||||
|
* Stash the first error we see; only report it if no parser
|
||||||
|
* succeeds
|
|
@ -0,0 +1,70 @@
|
||||||
|
From 237ea0d4762cc14d0fc80e80d61f0f08e1050c7f Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||||
|
Date: Thu, 12 Apr 2018 07:24:52 +0200
|
||||||
|
Subject: [PATCH] mtd: bcm47xxpart: improve handling TRX partition size
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
When bcm47xxpart finds a TRX partition (container) it's supposed to jump
|
||||||
|
to the end of it and keep looking for more partitions. TRX and its
|
||||||
|
subpartitions are handled by a separate parser.
|
||||||
|
|
||||||
|
The problem with old code was relying on the length specified in a TRX
|
||||||
|
header. That isn't reliable as TRX is commonly modified to have checksum
|
||||||
|
cover only non-changing subpartitions. Otherwise modifying e.g. a rootfs
|
||||||
|
would result in CRC32 mismatch and bootloader refusing to boot a
|
||||||
|
firmware.
|
||||||
|
|
||||||
|
Fix it by trying better to figure out a real TRX size. We can securely
|
||||||
|
assume that TRX has to cover all subpartitions and the last one is at
|
||||||
|
least of a block size in size. Then compare it with a length field.
|
||||||
|
|
||||||
|
This makes code more optimal & reliable thanks to skipping data that
|
||||||
|
shouldn't be parsed.
|
||||||
|
|
||||||
|
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||||
|
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
|
||||||
|
---
|
||||||
|
drivers/mtd/bcm47xxpart.c | 22 ++++++++++++++++++----
|
||||||
|
1 file changed, 18 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
--- a/drivers/mtd/bcm47xxpart.c
|
||||||
|
+++ b/drivers/mtd/bcm47xxpart.c
|
||||||
|
@@ -186,6 +186,8 @@ static int bcm47xxpart_parse(struct mtd_
|
||||||
|
/* TRX */
|
||||||
|
if (buf[0x000 / 4] == TRX_MAGIC) {
|
||||||
|
struct trx_header *trx;
|
||||||
|
+ uint32_t last_subpart;
|
||||||
|
+ uint32_t trx_size;
|
||||||
|
|
||||||
|
if (trx_num >= ARRAY_SIZE(trx_parts))
|
||||||
|
pr_warn("No enough space to store another TRX found at 0x%X\n",
|
||||||
|
@@ -195,11 +197,23 @@ static int bcm47xxpart_parse(struct mtd_
|
||||||
|
bcm47xxpart_add_part(&parts[curr_part++], "firmware",
|
||||||
|
offset, 0);
|
||||||
|
|
||||||
|
- /* Jump to the end of TRX */
|
||||||
|
+ /*
|
||||||
|
+ * Try to find TRX size. The "length" field isn't fully
|
||||||
|
+ * reliable as it could be decreased to make CRC32 cover
|
||||||
|
+ * only part of TRX data. It's commonly used as checksum
|
||||||
|
+ * can't cover e.g. ever-changing rootfs partition.
|
||||||
|
+ * Use offsets as helpers for assuming min TRX size.
|
||||||
|
+ */
|
||||||
|
trx = (struct trx_header *)buf;
|
||||||
|
- offset = roundup(offset + trx->length, blocksize);
|
||||||
|
- /* Next loop iteration will increase the offset */
|
||||||
|
- offset -= blocksize;
|
||||||
|
+ last_subpart = max3(trx->offset[0], trx->offset[1],
|
||||||
|
+ trx->offset[2]);
|
||||||
|
+ trx_size = max(trx->length, last_subpart + blocksize);
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Skip the TRX data. Decrease offset by block size as
|
||||||
|
+ * the next loop iteration will increase it.
|
||||||
|
+ */
|
||||||
|
+ offset += roundup(trx_size, blocksize) - blocksize;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,39 @@
|
||||||
|
From cf589ce71e84d3b8811c65740645af254c5248c0 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||||
|
Date: Wed, 9 May 2018 10:17:29 +0200
|
||||||
|
Subject: [PATCH] mtd: bcm47xxpart: add of_match_table with a new DT binding
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
This allows using bcm47xxpart parser to find partitions on flash
|
||||||
|
described in DT using the "brcm,bcm947xx-cfe-partitions" compatible
|
||||||
|
property. It means this parser doesn't have to be explicitly selected by
|
||||||
|
a flash driver anymore. It can be used e.g. together with a generic
|
||||||
|
m25p80 / spi-nor if device is just properly described.
|
||||||
|
|
||||||
|
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||||
|
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
|
||||||
|
---
|
||||||
|
drivers/mtd/bcm47xxpart.c | 7 +++++++
|
||||||
|
1 file changed, 7 insertions(+)
|
||||||
|
|
||||||
|
--- a/drivers/mtd/bcm47xxpart.c
|
||||||
|
+++ b/drivers/mtd/bcm47xxpart.c
|
||||||
|
@@ -304,9 +304,16 @@ static int bcm47xxpart_parse(struct mtd_
|
||||||
|
return curr_part;
|
||||||
|
};
|
||||||
|
|
||||||
|
+static const struct of_device_id bcm47xxpart_of_match_table[] = {
|
||||||
|
+ { .compatible = "brcm,bcm947xx-cfe-partitions" },
|
||||||
|
+ {},
|
||||||
|
+};
|
||||||
|
+MODULE_DEVICE_TABLE(of, bcm47xxpart_of_match_table);
|
||||||
|
+
|
||||||
|
static struct mtd_part_parser bcm47xxpart_mtd_parser = {
|
||||||
|
.parse_fn = bcm47xxpart_parse,
|
||||||
|
.name = "bcm47xxpart",
|
||||||
|
+ .of_match_table = bcm47xxpart_of_match_table,
|
||||||
|
};
|
||||||
|
module_mtd_part_parser(bcm47xxpart_mtd_parser);
|
||||||
|
|
|
@ -0,0 +1,37 @@
|
||||||
|
From 98534a58c8a40cdc9e3bcb04d74719fbcedfeb52 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||||
|
Date: Tue, 26 Jun 2018 00:05:08 +0200
|
||||||
|
Subject: [PATCH] mtd: parsers: trx: add of_match_table with the new DT binding
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
This allows using TRX parser to find TRX partitions on flash device
|
||||||
|
described in DT using a proper binding. It's useful for devices storing
|
||||||
|
firmware on a separated flash and having rootfs partition in it.
|
||||||
|
|
||||||
|
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||||
|
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
|
||||||
|
---
|
||||||
|
drivers/mtd/parsers/parser_trx.c | 7 +++++++
|
||||||
|
1 file changed, 7 insertions(+)
|
||||||
|
|
||||||
|
--- a/drivers/mtd/parsers/parser_trx.c
|
||||||
|
+++ b/drivers/mtd/parsers/parser_trx.c
|
||||||
|
@@ -116,9 +116,16 @@ static int parser_trx_parse(struct mtd_i
|
||||||
|
return i;
|
||||||
|
};
|
||||||
|
|
||||||
|
+static const struct of_device_id mtd_parser_trx_of_match_table[] = {
|
||||||
|
+ { .compatible = "brcm,trx" },
|
||||||
|
+ {},
|
||||||
|
+};
|
||||||
|
+MODULE_DEVICE_TABLE(of, mtd_parser_trx_of_match_table);
|
||||||
|
+
|
||||||
|
static struct mtd_part_parser mtd_parser_trx = {
|
||||||
|
.parse_fn = parser_trx_parse,
|
||||||
|
.name = "trx",
|
||||||
|
+ .of_match_table = mtd_parser_trx_of_match_table,
|
||||||
|
};
|
||||||
|
module_mtd_part_parser(mtd_parser_trx);
|
||||||
|
|
|
@ -0,0 +1,102 @@
|
||||||
|
From 76a832254ab05502c9394cc51ded6f0abe0e0bee Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||||
|
Date: Fri, 13 Jul 2018 16:32:21 +0200
|
||||||
|
Subject: [PATCH] mtd: partitions: use DT info for parsing partitions with
|
||||||
|
"compatible" prop
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
So far only flash devices could be described in DT regarding partitions
|
||||||
|
parsing. That could be done with "partitions" subnode and a proper
|
||||||
|
"compatible" string.
|
||||||
|
|
||||||
|
Some devices may use hierarchical (multi-level) layouts and may mix used
|
||||||
|
layouts (fixed and dynamic). Describing that in DT is done by specifying
|
||||||
|
"compatible" for DT-represented partition plus optionally more
|
||||||
|
properties and/or subnodes.
|
||||||
|
|
||||||
|
To support such layouts each DT partition has to be checked for
|
||||||
|
additional description.
|
||||||
|
|
||||||
|
Please note this implementation will work in parallel with support for
|
||||||
|
partition type specified for non-DT setups. That already works since
|
||||||
|
commit 1a0915be1926 ("mtd: partitions: add support for partition
|
||||||
|
parsers").
|
||||||
|
|
||||||
|
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||||
|
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
|
||||||
|
---
|
||||||
|
drivers/mtd/mtdpart.c | 33 +++++++++++++--------------------
|
||||||
|
1 file changed, 13 insertions(+), 20 deletions(-)
|
||||||
|
|
||||||
|
--- a/drivers/mtd/mtdpart.c
|
||||||
|
+++ b/drivers/mtd/mtdpart.c
|
||||||
|
@@ -370,22 +370,6 @@ static inline void free_partition(struct
|
||||||
|
kfree(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
-/**
|
||||||
|
- * mtd_parse_part - parse MTD partition looking for subpartitions
|
||||||
|
- *
|
||||||
|
- * @slave: part that is supposed to be a container and should be parsed
|
||||||
|
- * @types: NULL-terminated array with names of partition parsers to try
|
||||||
|
- *
|
||||||
|
- * Some partitions are kind of containers with extra subpartitions (volumes).
|
||||||
|
- * There can be various formats of such containers. This function tries to use
|
||||||
|
- * specified parsers to analyze given partition and registers found
|
||||||
|
- * subpartitions on success.
|
||||||
|
- */
|
||||||
|
-static int mtd_parse_part(struct mtd_part *slave, const char *const *types)
|
||||||
|
-{
|
||||||
|
- return parse_mtd_partitions(&slave->mtd, types, NULL);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static struct mtd_part *allocate_partition(struct mtd_info *parent,
|
||||||
|
const struct mtd_partition *part, int partno,
|
||||||
|
uint64_t cur_offset)
|
||||||
|
@@ -783,8 +767,8 @@ int add_mtd_partitions(struct mtd_info *
|
||||||
|
|
||||||
|
add_mtd_device(&slave->mtd);
|
||||||
|
mtd_add_partition_attrs(slave);
|
||||||
|
- if (parts[i].types)
|
||||||
|
- mtd_parse_part(slave, parts[i].types);
|
||||||
|
+ /* Look for subpartitions */
|
||||||
|
+ parse_mtd_partitions(&slave->mtd, parts[i].types, NULL);
|
||||||
|
|
||||||
|
cur_offset = slave->offset + slave->mtd.size;
|
||||||
|
}
|
||||||
|
@@ -860,6 +844,12 @@ static const char * const default_mtd_pa
|
||||||
|
NULL
|
||||||
|
};
|
||||||
|
|
||||||
|
+/* Check DT only when looking for subpartitions. */
|
||||||
|
+static const char * const default_subpartition_types[] = {
|
||||||
|
+ "ofpart",
|
||||||
|
+ NULL
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
static int mtd_part_do_parse(struct mtd_part_parser *parser,
|
||||||
|
struct mtd_info *master,
|
||||||
|
struct mtd_partitions *pparts,
|
||||||
|
@@ -930,7 +920,9 @@ static int mtd_part_of_parse(struct mtd_
|
||||||
|
const char *fixed = "fixed-partitions";
|
||||||
|
int ret, err = 0;
|
||||||
|
|
||||||
|
- np = of_get_child_by_name(mtd_get_of_node(master), "partitions");
|
||||||
|
+ np = mtd_get_of_node(master);
|
||||||
|
+ if (!mtd_is_partition(master))
|
||||||
|
+ np = of_get_child_by_name(np, "partitions");
|
||||||
|
of_property_for_each_string(np, "compatible", prop, compat) {
|
||||||
|
parser = mtd_part_get_compatible_parser(compat);
|
||||||
|
if (!parser)
|
||||||
|
@@ -993,7 +985,8 @@ int parse_mtd_partitions(struct mtd_info
|
||||||
|
int ret, err = 0;
|
||||||
|
|
||||||
|
if (!types)
|
||||||
|
- types = default_mtd_part_types;
|
||||||
|
+ types = mtd_is_partition(master) ? default_subpartition_types :
|
||||||
|
+ default_mtd_part_types;
|
||||||
|
|
||||||
|
for ( ; *types; types++) {
|
||||||
|
/*
|
|
@ -0,0 +1,58 @@
|
||||||
|
From 1186af457cc186c5ed01708da71b1ffbdf0a2638 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||||
|
Date: Tue, 20 Nov 2018 09:55:45 +0100
|
||||||
|
Subject: [PATCH] mtd: keep original flags for every struct mtd_info
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
When allocating a new partition mtd subsystem runs internal tests in the
|
||||||
|
allocate_partition(). They may result in modifying specified flags (e.g.
|
||||||
|
dropping some /features/ like write access).
|
||||||
|
|
||||||
|
Those constraints don't have to be necessary true for subpartitions. It
|
||||||
|
may happen parent partition isn't block aligned (effectively disabling
|
||||||
|
write access) while subpartition may fit blocks nicely. In such case all
|
||||||
|
checks should be run again (starting with original flags value).
|
||||||
|
|
||||||
|
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||||
|
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
|
||||||
|
---
|
||||||
|
drivers/mtd/mtdcore.c | 2 ++
|
||||||
|
drivers/mtd/mtdpart.c | 3 ++-
|
||||||
|
include/linux/mtd/mtd.h | 1 +
|
||||||
|
3 files changed, 5 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
--- a/drivers/mtd/mtdcore.c
|
||||||
|
+++ b/drivers/mtd/mtdcore.c
|
||||||
|
@@ -650,6 +650,8 @@ static void mtd_set_dev_defaults(struct
|
||||||
|
} else {
|
||||||
|
pr_debug("mtd device won't show a device symlink in sysfs\n");
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+ mtd->orig_flags = mtd->flags;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
--- a/drivers/mtd/mtdpart.c
|
||||||
|
+++ b/drivers/mtd/mtdpart.c
|
||||||
|
@@ -394,7 +394,8 @@ static struct mtd_part *allocate_partiti
|
||||||
|
|
||||||
|
/* set up the MTD object for this partition */
|
||||||
|
slave->mtd.type = parent->type;
|
||||||
|
- slave->mtd.flags = parent->flags & ~part->mask_flags;
|
||||||
|
+ slave->mtd.flags = parent->orig_flags & ~part->mask_flags;
|
||||||
|
+ slave->mtd.orig_flags = slave->mtd.flags;
|
||||||
|
slave->mtd.size = part->size;
|
||||||
|
slave->mtd.writesize = parent->writesize;
|
||||||
|
slave->mtd.writebufsize = parent->writebufsize;
|
||||||
|
--- a/include/linux/mtd/mtd.h
|
||||||
|
+++ b/include/linux/mtd/mtd.h
|
||||||
|
@@ -218,6 +218,7 @@ struct mtd_debug_info {
|
||||||
|
struct mtd_info {
|
||||||
|
u_char type;
|
||||||
|
uint32_t flags;
|
||||||
|
+ uint32_t orig_flags; /* Flags as before running mtd checks */
|
||||||
|
uint64_t size; // Total size of the MTD
|
||||||
|
|
||||||
|
/* "Major" erase size for the device. Naïve users may take this
|
|
@ -0,0 +1,55 @@
|
||||||
|
From 6750f61a13a0197c40e4a40739117493b15f19e8 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||||
|
Date: Tue, 20 Nov 2018 10:24:09 +0100
|
||||||
|
Subject: [PATCH] mtd: improve calculating partition boundaries when checking
|
||||||
|
for alignment
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
When checking for alignment mtd should check absolute offsets. It's
|
||||||
|
important for subpartitions as it doesn't make sense to check their
|
||||||
|
relative addresses.
|
||||||
|
|
||||||
|
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||||
|
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
|
||||||
|
---
|
||||||
|
drivers/mtd/mtdpart.c | 13 +++++++++++--
|
||||||
|
1 file changed, 11 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
--- a/drivers/mtd/mtdpart.c
|
||||||
|
+++ b/drivers/mtd/mtdpart.c
|
||||||
|
@@ -61,6 +61,15 @@ static inline struct mtd_part *mtd_to_pa
|
||||||
|
return container_of(mtd, struct mtd_part, mtd);
|
||||||
|
}
|
||||||
|
|
||||||
|
+static u64 part_absolute_offset(struct mtd_info *mtd)
|
||||||
|
+{
|
||||||
|
+ struct mtd_part *part = mtd_to_part(mtd);
|
||||||
|
+
|
||||||
|
+ if (!mtd_is_partition(mtd))
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+ return part_absolute_offset(part->parent) + part->offset;
|
||||||
|
+}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* MTD methods which simply translate the effective address and pass through
|
||||||
|
@@ -562,7 +571,7 @@ static struct mtd_part *allocate_partiti
|
||||||
|
if (!(slave->mtd.flags & MTD_NO_ERASE))
|
||||||
|
wr_alignment = slave->mtd.erasesize;
|
||||||
|
|
||||||
|
- tmp = slave->offset;
|
||||||
|
+ tmp = part_absolute_offset(parent) + slave->offset;
|
||||||
|
remainder = do_div(tmp, wr_alignment);
|
||||||
|
if ((slave->mtd.flags & MTD_WRITEABLE) && remainder) {
|
||||||
|
/* Doesn't start on a boundary of major erase size */
|
||||||
|
@@ -573,7 +582,7 @@ static struct mtd_part *allocate_partiti
|
||||||
|
part->name);
|
||||||
|
}
|
||||||
|
|
||||||
|
- tmp = slave->mtd.size;
|
||||||
|
+ tmp = part_absolute_offset(parent) + slave->mtd.size;
|
||||||
|
remainder = do_div(tmp, wr_alignment);
|
||||||
|
if ((slave->mtd.flags & MTD_WRITEABLE) && remainder) {
|
||||||
|
slave->mtd.flags &= ~MTD_WRITEABLE;
|
|
@ -0,0 +1,50 @@
|
||||||
|
From 12acd136913ccdf394eeb2bc8686ff5505368119 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||||
|
Date: Thu, 12 Oct 2017 10:21:26 +0200
|
||||||
|
Subject: [PATCH] net: bgmac: enable master mode for BCM54210E and B50212E PHYs
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
There are 4 very similar PHYs:
|
||||||
|
0x600d84a1: BCM54210E (rev B0)
|
||||||
|
0x600d84a2: BCM54210E (rev B1)
|
||||||
|
0x600d84a5: B50212E (rev B0)
|
||||||
|
0x600d84a6: B50212E (rev B1)
|
||||||
|
that need setting master mode manually. It's because they run in slave
|
||||||
|
mode by default with Automatic Slave/Master configuration disabled which
|
||||||
|
can lead to unreliable connection with massive ping loss.
|
||||||
|
|
||||||
|
So far it was reported for a board with BCM47189 SoC and B50212E B1 PHY
|
||||||
|
connected to the bgmac supported ethernet device. Telling PHY driver to
|
||||||
|
setup PHY properly solves this issue.
|
||||||
|
|
||||||
|
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||||
|
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||||
|
---
|
||||||
|
drivers/net/ethernet/broadcom/bgmac-bcma.c | 8 +++++++-
|
||||||
|
1 file changed, 7 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
--- a/drivers/net/ethernet/broadcom/bgmac-bcma.c
|
||||||
|
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c
|
||||||
|
@@ -184,13 +184,19 @@ static int bgmac_probe(struct bcma_devic
|
||||||
|
|
||||||
|
if (!bgmac_is_bcm4707_family(core) &&
|
||||||
|
!(ci->id == BCMA_CHIP_ID_BCM53573 && core->core_unit == 1)) {
|
||||||
|
+ struct phy_device *phydev;
|
||||||
|
+
|
||||||
|
mii_bus = bcma_mdio_mii_register(bgmac);
|
||||||
|
if (IS_ERR(mii_bus)) {
|
||||||
|
err = PTR_ERR(mii_bus);
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
-
|
||||||
|
bgmac->mii_bus = mii_bus;
|
||||||
|
+
|
||||||
|
+ phydev = mdiobus_get_phy(bgmac->mii_bus, bgmac->phyaddr);
|
||||||
|
+ if (ci->id == BCMA_CHIP_ID_BCM53573 && phydev &&
|
||||||
|
+ (phydev->drv->phy_id & phydev->drv->phy_id_mask) == PHY_ID_BCM54210E)
|
||||||
|
+ phydev->dev_flags |= PHY_BRCM_EN_MASTER_MODE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (core->bus->hosttype == BCMA_HOSTTYPE_PCI) {
|
|
@ -0,0 +1,54 @@
|
||||||
|
From 2355a6546a053b1c16ebefd6ce1f0cccc00e1da5 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
|
||||||
|
Date: Thu, 12 Oct 2017 10:21:25 +0200
|
||||||
|
Subject: [PATCH] net: phy: broadcom: support new device flag for setting
|
||||||
|
master mode
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
Some of Broadcom's PHYs run by default in slave mode with Automatic
|
||||||
|
Slave/Master configuration disabled. It stops them from working properly
|
||||||
|
with some devices.
|
||||||
|
|
||||||
|
So far it has been verified for BCM54210E and BCM50212E which don't
|
||||||
|
work well with Intel's I217-LM and I218-LM:
|
||||||
|
http://ark.intel.com/products/60019/Intel-Ethernet-Connection-I217-LM
|
||||||
|
http://ark.intel.com/products/71307/Intel-Ethernet-Connection-I218-LM
|
||||||
|
I was told there is massive ping loss.
|
||||||
|
|
||||||
|
This commit adds support for a new flag which can be set by an ethernet
|
||||||
|
driver to fixup PHY setup.
|
||||||
|
|
||||||
|
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
|
||||||
|
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||||
|
---
|
||||||
|
drivers/net/phy/broadcom.c | 6 ++++++
|
||||||
|
include/linux/brcmphy.h | 1 +
|
||||||
|
2 files changed, 7 insertions(+)
|
||||||
|
|
||||||
|
--- a/drivers/net/phy/broadcom.c
|
||||||
|
+++ b/drivers/net/phy/broadcom.c
|
||||||
|
@@ -43,6 +43,12 @@ static int bcm54210e_config_init(struct
|
||||||
|
val &= ~BCM54810_SHD_CLK_CTL_GTXCLK_EN;
|
||||||
|
bcm_phy_write_shadow(phydev, BCM54810_SHD_CLK_CTL, val);
|
||||||
|
|
||||||
|
+ if (phydev->dev_flags & PHY_BRCM_EN_MASTER_MODE) {
|
||||||
|
+ val = phy_read(phydev, MII_CTRL1000);
|
||||||
|
+ val |= CTL1000_AS_MASTER | CTL1000_ENABLE_MASTER;
|
||||||
|
+ phy_write(phydev, MII_CTRL1000, val);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
--- a/include/linux/brcmphy.h
|
||||||
|
+++ b/include/linux/brcmphy.h
|
||||||
|
@@ -64,6 +64,7 @@
|
||||||
|
#define PHY_BRCM_EXT_IBND_TX_ENABLE 0x00002000
|
||||||
|
#define PHY_BRCM_CLEAR_RGMII_MODE 0x00004000
|
||||||
|
#define PHY_BRCM_DIS_TXCRXC_NOENRGY 0x00008000
|
||||||
|
+#define PHY_BRCM_EN_MASTER_MODE 0x00010000
|
||||||
|
|
||||||
|
/* Broadcom BCM7xxx specific workarounds */
|
||||||
|
#define PHY_BRCM_7XXX_REV(x) (((x) >> 8) & 0xff)
|
|
@ -0,0 +1,84 @@
|
||||||
|
From f11a04464ae57e8db1bb7634547842b43e36a898 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Jan=20Kundr=C3=A1t?= <jan.kundrat@cesnet.cz>
|
||||||
|
Date: Fri, 22 Dec 2017 22:47:16 +0100
|
||||||
|
Subject: i2c: gpio: Enable working over slow can_sleep GPIOs
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
"Slow" GPIOs (usually those connected over an SPI or an I2C bus) are,
|
||||||
|
well, slow in their operation. It is generally a good idea to avoid
|
||||||
|
using them for time-critical operation, but sometimes the hardware just
|
||||||
|
sucks, and the software has to cope. In addition to that, the I2C bus
|
||||||
|
itself does not actually define any strict timing limits; the bus is
|
||||||
|
free to go all the way down to DC. The timeouts (and therefore the
|
||||||
|
slowest acceptable frequency) are present only in SMBus.
|
||||||
|
|
||||||
|
The `can_sleep` is IMHO a wrong concept to use here. My SPI-to-quad-UART
|
||||||
|
chip (MAX14830) is connected via a 26MHz SPI bus, and it happily drives
|
||||||
|
SCL at 200kHz (5µs pulses) during my benchmarks. That's faster than the
|
||||||
|
maximal allowed speed of the traditional I2C.
|
||||||
|
|
||||||
|
The previous version of this code did not really block operation over
|
||||||
|
slow GPIO pins, anyway. Instead, it just resorted to printing a warning
|
||||||
|
with a backtrace each time a GPIO pin was accessed, thereby slowing
|
||||||
|
things down even more.
|
||||||
|
|
||||||
|
Finally, it's not just me. A similar patch was originally submitted in
|
||||||
|
2015 [1].
|
||||||
|
|
||||||
|
[1] https://patchwork.ozlabs.org/patch/450956/
|
||||||
|
|
||||||
|
Signed-off-by: Jan Kundrát <jan.kundrat@cesnet.cz>
|
||||||
|
Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
|
||||||
|
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
|
||||||
|
---
|
||||||
|
drivers/i2c/busses/i2c-gpio.c | 11 +++++++----
|
||||||
|
1 file changed, 7 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
--- a/drivers/i2c/busses/i2c-gpio.c
|
||||||
|
+++ b/drivers/i2c/busses/i2c-gpio.c
|
||||||
|
@@ -44,7 +44,7 @@ static void i2c_gpio_setsda_val(void *da
|
||||||
|
{
|
||||||
|
struct i2c_gpio_platform_data *pdata = data;
|
||||||
|
|
||||||
|
- gpio_set_value(pdata->sda_pin, state);
|
||||||
|
+ gpio_set_value_cansleep(pdata->sda_pin, state);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Toggle SCL by changing the direction of the pin. */
|
||||||
|
@@ -68,21 +68,21 @@ static void i2c_gpio_setscl_val(void *da
|
||||||
|
{
|
||||||
|
struct i2c_gpio_platform_data *pdata = data;
|
||||||
|
|
||||||
|
- gpio_set_value(pdata->scl_pin, state);
|
||||||
|
+ gpio_set_value_cansleep(pdata->scl_pin, state);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int i2c_gpio_getsda(void *data)
|
||||||
|
{
|
||||||
|
struct i2c_gpio_platform_data *pdata = data;
|
||||||
|
|
||||||
|
- return gpio_get_value(pdata->sda_pin);
|
||||||
|
+ return gpio_get_value_cansleep(pdata->sda_pin);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int i2c_gpio_getscl(void *data)
|
||||||
|
{
|
||||||
|
struct i2c_gpio_platform_data *pdata = data;
|
||||||
|
|
||||||
|
- return gpio_get_value(pdata->scl_pin);
|
||||||
|
+ return gpio_get_value_cansleep(pdata->scl_pin);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int of_i2c_gpio_get_pins(struct device_node *np,
|
||||||
|
@@ -175,6 +175,9 @@ static int i2c_gpio_probe(struct platfor
|
||||||
|
memcpy(pdata, dev_get_platdata(&pdev->dev), sizeof(*pdata));
|
||||||
|
}
|
||||||
|
|
||||||
|
+ if (gpiod_cansleep(gpio_to_desc(pdata->sda_pin)) || gpiod_cansleep(gpio_to_desc(pdata->scl_pin)))
|
||||||
|
+ dev_warn(&pdev->dev, "Slow GPIO pins might wreak havoc into I2C/SMBus bus timing");
|
||||||
|
+
|
||||||
|
if (pdata->sda_is_open_drain) {
|
||||||
|
gpio_direction_output(pdata->sda_pin, 1);
|
||||||
|
bit_data->setsda = i2c_gpio_setsda_val;
|
|
@ -0,0 +1,145 @@
|
||||||
|
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
|
||||||
|
Date: Thu, 24 May 2018 11:56:48 +0300
|
||||||
|
Subject: [PATCH] net: bridge: add support for port isolation
|
||||||
|
|
||||||
|
This patch adds support for a new port flag - BR_ISOLATED. If it is set
|
||||||
|
then isolated ports cannot communicate between each other, but they can
|
||||||
|
still communicate with non-isolated ports. The same can be achieved via
|
||||||
|
ACLs but they can't scale with large number of ports and also the
|
||||||
|
complexity of the rules grows. This feature can be used to achieve
|
||||||
|
isolated vlan functionality (similar to pvlan) as well, though currently
|
||||||
|
it will be port-wide (for all vlans on the port). The new test in
|
||||||
|
should_deliver uses data that is already cache hot and the new boolean
|
||||||
|
is used to avoid an additional source port test in should_deliver.
|
||||||
|
|
||||||
|
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
|
||||||
|
Reviewed-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
|
||||||
|
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/uapi/linux/if_link.h
|
||||||
|
+++ b/include/uapi/linux/if_link.h
|
||||||
|
@@ -326,6 +326,8 @@ enum {
|
||||||
|
IFLA_BRPORT_MCAST_TO_UCAST,
|
||||||
|
IFLA_BRPORT_VLAN_TUNNEL,
|
||||||
|
IFLA_BRPORT_BCAST_FLOOD,
|
||||||
|
+ IFLA_BRPORT_NEIGH_SUPPRESS,
|
||||||
|
+ IFLA_BRPORT_ISOLATED,
|
||||||
|
__IFLA_BRPORT_MAX
|
||||||
|
};
|
||||||
|
#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
|
||||||
|
--- a/net/bridge/br_forward.c
|
||||||
|
+++ b/net/bridge/br_forward.c
|
||||||
|
@@ -30,7 +30,8 @@ static inline int should_deliver(const s
|
||||||
|
vg = nbp_vlan_group_rcu(p);
|
||||||
|
return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) &&
|
||||||
|
br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING &&
|
||||||
|
- nbp_switchdev_allowed_egress(p, skb);
|
||||||
|
+ nbp_switchdev_allowed_egress(p, skb) &&
|
||||||
|
+ !br_skb_isolated(p, skb);
|
||||||
|
}
|
||||||
|
|
||||||
|
int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
|
||||||
|
--- a/net/bridge/br_input.c
|
||||||
|
+++ b/net/bridge/br_input.c
|
||||||
|
@@ -170,6 +170,7 @@ int br_handle_frame_finish(struct net *n
|
||||||
|
goto drop;
|
||||||
|
|
||||||
|
BR_INPUT_SKB_CB(skb)->brdev = br->dev;
|
||||||
|
+ BR_INPUT_SKB_CB(skb)->src_port_isolated = !!(p->flags & BR_ISOLATED);
|
||||||
|
|
||||||
|
if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP))
|
||||||
|
br_do_proxy_arp(skb, br, vid, p);
|
||||||
|
--- a/net/bridge/br_netlink.c
|
||||||
|
+++ b/net/bridge/br_netlink.c
|
||||||
|
@@ -138,6 +138,7 @@ static inline size_t br_port_info_size(v
|
||||||
|
+ nla_total_size(1) /* IFLA_BRPORT_PROXYARP */
|
||||||
|
+ nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */
|
||||||
|
+ nla_total_size(1) /* IFLA_BRPORT_VLAN_TUNNEL */
|
||||||
|
+ + nla_total_size(1) /* IFLA_BRPORT_ISOLATED */
|
||||||
|
+ nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */
|
||||||
|
+ nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */
|
||||||
|
+ nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */
|
||||||
|
@@ -208,7 +209,8 @@ static int br_port_fill_attrs(struct sk_
|
||||||
|
p->topology_change_ack) ||
|
||||||
|
nla_put_u8(skb, IFLA_BRPORT_CONFIG_PENDING, p->config_pending) ||
|
||||||
|
nla_put_u8(skb, IFLA_BRPORT_VLAN_TUNNEL, !!(p->flags &
|
||||||
|
- BR_VLAN_TUNNEL)))
|
||||||
|
+ BR_VLAN_TUNNEL)) ||
|
||||||
|
+ nla_put_u8(skb, IFLA_BRPORT_ISOLATED, !!(p->flags & BR_ISOLATED)))
|
||||||
|
return -EMSGSIZE;
|
||||||
|
|
||||||
|
timerval = br_timer_value(&p->message_age_timer);
|
||||||
|
@@ -637,6 +639,7 @@ static const struct nla_policy br_port_p
|
||||||
|
[IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NLA_U8 },
|
||||||
|
[IFLA_BRPORT_MCAST_FLOOD] = { .type = NLA_U8 },
|
||||||
|
[IFLA_BRPORT_BCAST_FLOOD] = { .type = NLA_U8 },
|
||||||
|
+ [IFLA_BRPORT_ISOLATED] = { .type = NLA_U8 },
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Change the state of the port and notify spanning tree */
|
||||||
|
@@ -773,6 +776,11 @@ static int br_setport(struct net_bridge_
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
+
|
||||||
|
+ err = br_set_port_flag(p, tb, IFLA_BRPORT_ISOLATED, BR_ISOLATED);
|
||||||
|
+ if (err)
|
||||||
|
+ return err;
|
||||||
|
+
|
||||||
|
br_port_flags_change(p, old_flags ^ p->flags);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
--- a/net/bridge/br_private.h
|
||||||
|
+++ b/net/bridge/br_private.h
|
||||||
|
@@ -407,6 +407,7 @@ struct br_input_skb_cb {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
bool proxyarp_replied;
|
||||||
|
+ bool src_port_isolated;
|
||||||
|
|
||||||
|
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
|
||||||
|
bool vlan_filtered;
|
||||||
|
@@ -554,6 +555,14 @@ int br_forward_finish(struct net *net, s
|
||||||
|
void br_flood(struct net_bridge *br, struct sk_buff *skb,
|
||||||
|
enum br_pkt_type pkt_type, bool local_rcv, bool local_orig);
|
||||||
|
|
||||||
|
+/* return true if both source port and dest port are isolated */
|
||||||
|
+static inline bool br_skb_isolated(const struct net_bridge_port *to,
|
||||||
|
+ const struct sk_buff *skb)
|
||||||
|
+{
|
||||||
|
+ return BR_INPUT_SKB_CB(skb)->src_port_isolated &&
|
||||||
|
+ (to->flags & BR_ISOLATED);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* br_if.c */
|
||||||
|
void br_port_carrier_check(struct net_bridge_port *p);
|
||||||
|
int br_add_bridge(struct net *net, const char *name);
|
||||||
|
--- a/net/bridge/br_sysfs_if.c
|
||||||
|
+++ b/net/bridge/br_sysfs_if.c
|
||||||
|
@@ -174,6 +174,7 @@ BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP);
|
||||||
|
BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI);
|
||||||
|
BRPORT_ATTR_FLAG(multicast_flood, BR_MCAST_FLOOD);
|
||||||
|
BRPORT_ATTR_FLAG(broadcast_flood, BR_BCAST_FLOOD);
|
||||||
|
+BRPORT_ATTR_FLAG(isolated, BR_ISOLATED);
|
||||||
|
|
||||||
|
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
|
||||||
|
static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
|
||||||
|
@@ -223,6 +224,7 @@ static const struct brport_attribute *br
|
||||||
|
&brport_attr_proxyarp_wifi,
|
||||||
|
&brport_attr_multicast_flood,
|
||||||
|
&brport_attr_broadcast_flood,
|
||||||
|
+ &brport_attr_isolated,
|
||||||
|
NULL
|
||||||
|
};
|
||||||
|
|
||||||
|
--- a/include/linux/if_bridge.h
|
||||||
|
+++ b/include/linux/if_bridge.h
|
||||||
|
@@ -49,6 +49,7 @@ struct br_ip_list {
|
||||||
|
#define BR_MULTICAST_TO_UNICAST BIT(12)
|
||||||
|
#define BR_VLAN_TUNNEL BIT(13)
|
||||||
|
#define BR_BCAST_FLOOD BIT(14)
|
||||||
|
+#define BR_ISOLATED BIT(16)
|
||||||
|
|
||||||
|
#define BR_DEFAULT_AGEING_TIME (300 * HZ)
|
||||||
|
|
|
@ -0,0 +1,55 @@
|
||||||
|
From 649affd04813c43e0a72886517fcfccd63230981 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Hauke Mehrtens <hauke@hauke-m.de>
|
||||||
|
Date: Mon, 29 Jun 2015 16:53:03 +0200
|
||||||
|
Subject: uapi/if_ether.h: prevent redefinition of struct ethhdr
|
||||||
|
|
||||||
|
Musl provides its own ethhdr struct definition. Add a guard to prevent
|
||||||
|
its definition of the appropriate musl header has already been included.
|
||||||
|
|
||||||
|
glibc does not implement this header, but when glibc will implement this
|
||||||
|
they can just define __UAPI_DEF_ETHHDR 0 to make it work with the
|
||||||
|
kernel.
|
||||||
|
|
||||||
|
Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
|
||||||
|
---
|
||||||
|
include/uapi/linux/if_ether.h | 3 +++
|
||||||
|
include/uapi/linux/libc-compat.h | 6 ++++++
|
||||||
|
2 files changed, 9 insertions(+)
|
||||||
|
|
||||||
|
--- a/include/uapi/linux/if_ether.h
|
||||||
|
+++ b/include/uapi/linux/if_ether.h
|
||||||
|
@@ -23,6 +23,7 @@
|
||||||
|
#define _UAPI_LINUX_IF_ETHER_H
|
||||||
|
|
||||||
|
#include <linux/types.h>
|
||||||
|
+#include <linux/libc-compat.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* IEEE 802.3 Ethernet magic constants. The frame sizes omit the preamble
|
||||||
|
@@ -150,11 +151,13 @@
|
||||||
|
* This is an Ethernet frame header.
|
||||||
|
*/
|
||||||
|
|
||||||
|
+#if __UAPI_DEF_ETHHDR
|
||||||
|
struct ethhdr {
|
||||||
|
unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
|
||||||
|
unsigned char h_source[ETH_ALEN]; /* source ether addr */
|
||||||
|
__be16 h_proto; /* packet type ID field */
|
||||||
|
} __attribute__((packed));
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* _UAPI_LINUX_IF_ETHER_H */
|
||||||
|
--- a/include/uapi/linux/libc-compat.h
|
||||||
|
+++ b/include/uapi/linux/libc-compat.h
|
||||||
|
@@ -264,4 +264,10 @@
|
||||||
|
|
||||||
|
#endif /* __GLIBC__ */
|
||||||
|
|
||||||
|
+/* Definitions for if_ether.h */
|
||||||
|
+/* allow libcs like musl to deactivate this, glibc does not implement this. */
|
||||||
|
+#ifndef __UAPI_DEF_ETHHDR
|
||||||
|
+#define __UAPI_DEF_ETHHDR 1
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
#endif /* _UAPI_LIBC_COMPAT_H */
|
|
@ -0,0 +1,67 @@
|
||||||
|
From e58f33cc84bc089c430ac955f3cad6380ae98591 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Florian Westphal <fw@strlen.de>
|
||||||
|
Date: Thu, 7 Dec 2017 16:28:23 +0100
|
||||||
|
Subject: [PATCH] netfilter: add defines for arp/decnet max hooks
|
||||||
|
|
||||||
|
The kernel already has defines for this, but they are in uapi exposed
|
||||||
|
headers.
|
||||||
|
|
||||||
|
Including these from netns.h causes build errors and also adds unneeded
|
||||||
|
dependencies on heads that we don't need.
|
||||||
|
|
||||||
|
So move these defines to netfilter_defs.h and place the uapi ones
|
||||||
|
in ifndef __KERNEL__ to keep them for userspace.
|
||||||
|
|
||||||
|
Signed-off-by: Florian Westphal <fw@strlen.de>
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
include/linux/netfilter_defs.h | 6 ++++++
|
||||||
|
include/uapi/linux/netfilter_arp.h | 3 +++
|
||||||
|
include/uapi/linux/netfilter_decnet.h | 4 +++-
|
||||||
|
3 files changed, 12 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
--- a/include/linux/netfilter_defs.h
|
||||||
|
+++ b/include/linux/netfilter_defs.h
|
||||||
|
@@ -7,4 +7,10 @@
|
||||||
|
/* Largest hook number + 1, see uapi/linux/netfilter_decnet.h */
|
||||||
|
#define NF_MAX_HOOKS 8
|
||||||
|
|
||||||
|
+/* in/out/forward only */
|
||||||
|
+#define NF_ARP_NUMHOOKS 3
|
||||||
|
+
|
||||||
|
+/* max hook is NF_DN_ROUTE (6), also see uapi/linux/netfilter_decnet.h */
|
||||||
|
+#define NF_DN_NUMHOOKS 7
|
||||||
|
+
|
||||||
|
#endif
|
||||||
|
--- a/include/uapi/linux/netfilter_arp.h
|
||||||
|
+++ b/include/uapi/linux/netfilter_arp.h
|
||||||
|
@@ -15,6 +15,9 @@
|
||||||
|
#define NF_ARP_IN 0
|
||||||
|
#define NF_ARP_OUT 1
|
||||||
|
#define NF_ARP_FORWARD 2
|
||||||
|
+
|
||||||
|
+#ifndef __KERNEL__
|
||||||
|
#define NF_ARP_NUMHOOKS 3
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
#endif /* __LINUX_ARP_NETFILTER_H */
|
||||||
|
--- a/include/uapi/linux/netfilter_decnet.h
|
||||||
|
+++ b/include/uapi/linux/netfilter_decnet.h
|
||||||
|
@@ -24,6 +24,9 @@
|
||||||
|
#define NFC_DN_IF_IN 0x0004
|
||||||
|
/* Output device. */
|
||||||
|
#define NFC_DN_IF_OUT 0x0008
|
||||||
|
+
|
||||||
|
+/* kernel define is in netfilter_defs.h */
|
||||||
|
+#define NF_DN_NUMHOOKS 7
|
||||||
|
#endif /* ! __KERNEL__ */
|
||||||
|
|
||||||
|
/* DECnet Hooks */
|
||||||
|
@@ -41,7 +44,6 @@
|
||||||
|
#define NF_DN_HELLO 5
|
||||||
|
/* Input Routing Packets */
|
||||||
|
#define NF_DN_ROUTE 6
|
||||||
|
-#define NF_DN_NUMHOOKS 7
|
||||||
|
|
||||||
|
enum nf_dn_hook_priorities {
|
||||||
|
NF_DN_PRI_FIRST = INT_MIN,
|
|
@ -0,0 +1,91 @@
|
||||||
|
From 4e645b47c4f000a503b9c90163ad905786b9bc1d Mon Sep 17 00:00:00 2001
|
||||||
|
From: Florian Westphal <fw@strlen.de>
|
||||||
|
Date: Fri, 1 Dec 2017 00:21:02 +0100
|
||||||
|
Subject: [PATCH 02/11] netfilter: core: make nf_unregister_net_hooks simple
|
||||||
|
wrapper again
|
||||||
|
|
||||||
|
This reverts commit d3ad2c17b4047
|
||||||
|
("netfilter: core: batch nf_unregister_net_hooks synchronize_net calls").
|
||||||
|
|
||||||
|
Nothing wrong with it. However, followup patch will delay freeing of hooks
|
||||||
|
with call_rcu, so all synchronize_net() calls become obsolete and there
|
||||||
|
is no need anymore for this batching.
|
||||||
|
|
||||||
|
This revert causes a temporary performance degradation when destroying
|
||||||
|
network namespace, but its resolved with the upcoming call_rcu conversion.
|
||||||
|
|
||||||
|
Signed-off-by: Florian Westphal <fw@strlen.de>
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
net/netfilter/core.c | 59 +++-------------------------------------------------
|
||||||
|
1 file changed, 3 insertions(+), 56 deletions(-)
|
||||||
|
|
||||||
|
--- a/net/netfilter/core.c
|
||||||
|
+++ b/net/netfilter/core.c
|
||||||
|
@@ -395,63 +395,10 @@ EXPORT_SYMBOL(nf_register_net_hooks);
|
||||||
|
void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
|
||||||
|
unsigned int hookcount)
|
||||||
|
{
|
||||||
|
- struct nf_hook_entries *to_free[16], *p;
|
||||||
|
- struct nf_hook_entries __rcu **pp;
|
||||||
|
- unsigned int i, j, n;
|
||||||
|
+ unsigned int i;
|
||||||
|
|
||||||
|
- mutex_lock(&nf_hook_mutex);
|
||||||
|
- for (i = 0; i < hookcount; i++) {
|
||||||
|
- pp = nf_hook_entry_head(net, ®[i]);
|
||||||
|
- if (!pp)
|
||||||
|
- continue;
|
||||||
|
-
|
||||||
|
- p = nf_entry_dereference(*pp);
|
||||||
|
- if (WARN_ON_ONCE(!p))
|
||||||
|
- continue;
|
||||||
|
- __nf_unregister_net_hook(p, ®[i]);
|
||||||
|
- }
|
||||||
|
- mutex_unlock(&nf_hook_mutex);
|
||||||
|
-
|
||||||
|
- do {
|
||||||
|
- n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free));
|
||||||
|
-
|
||||||
|
- mutex_lock(&nf_hook_mutex);
|
||||||
|
-
|
||||||
|
- for (i = 0, j = 0; i < hookcount && j < n; i++) {
|
||||||
|
- pp = nf_hook_entry_head(net, ®[i]);
|
||||||
|
- if (!pp)
|
||||||
|
- continue;
|
||||||
|
-
|
||||||
|
- p = nf_entry_dereference(*pp);
|
||||||
|
- if (!p)
|
||||||
|
- continue;
|
||||||
|
-
|
||||||
|
- to_free[j] = __nf_hook_entries_try_shrink(pp);
|
||||||
|
- if (to_free[j])
|
||||||
|
- ++j;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- mutex_unlock(&nf_hook_mutex);
|
||||||
|
-
|
||||||
|
- if (j) {
|
||||||
|
- unsigned int nfq;
|
||||||
|
-
|
||||||
|
- synchronize_net();
|
||||||
|
-
|
||||||
|
- /* need 2nd synchronize_net() if nfqueue is used, skb
|
||||||
|
- * can get reinjected right before nf_queue_hook_drop()
|
||||||
|
- */
|
||||||
|
- nfq = nf_queue_nf_hook_drop(net);
|
||||||
|
- if (nfq)
|
||||||
|
- synchronize_net();
|
||||||
|
-
|
||||||
|
- for (i = 0; i < j; i++)
|
||||||
|
- kvfree(to_free[i]);
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- reg += n;
|
||||||
|
- hookcount -= n;
|
||||||
|
- } while (hookcount > 0);
|
||||||
|
+ for (i = 0; i < hookcount; i++)
|
||||||
|
+ nf_unregister_net_hook(net, ®[i]);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(nf_unregister_net_hooks);
|
||||||
|
|
|
@ -0,0 +1,116 @@
|
||||||
|
From 26888dfd7e7454686b8d3ea9ba5045d5f236e4d7 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Florian Westphal <fw@strlen.de>
|
||||||
|
Date: Fri, 1 Dec 2017 00:21:03 +0100
|
||||||
|
Subject: [PATCH 03/11] netfilter: core: remove synchronize_net call if nfqueue
|
||||||
|
is used
|
||||||
|
|
||||||
|
since commit 960632ece6949b ("netfilter: convert hook list to an array")
|
||||||
|
nfqueue no longer stores a pointer to the hook that caused the packet
|
||||||
|
to be queued. Therefore no extra synchronize_net() call is needed after
|
||||||
|
dropping the packets enqueued by the old rule blob.
|
||||||
|
|
||||||
|
Signed-off-by: Florian Westphal <fw@strlen.de>
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
include/net/netfilter/nf_queue.h | 2 +-
|
||||||
|
net/netfilter/core.c | 6 +-----
|
||||||
|
net/netfilter/nf_internals.h | 2 +-
|
||||||
|
net/netfilter/nf_queue.c | 7 ++-----
|
||||||
|
net/netfilter/nfnetlink_queue.c | 9 ++-------
|
||||||
|
5 files changed, 7 insertions(+), 19 deletions(-)
|
||||||
|
|
||||||
|
--- a/include/net/netfilter/nf_queue.h
|
||||||
|
+++ b/include/net/netfilter/nf_queue.h
|
||||||
|
@@ -25,7 +25,7 @@ struct nf_queue_entry {
|
||||||
|
struct nf_queue_handler {
|
||||||
|
int (*outfn)(struct nf_queue_entry *entry,
|
||||||
|
unsigned int queuenum);
|
||||||
|
- unsigned int (*nf_hook_drop)(struct net *net);
|
||||||
|
+ void (*nf_hook_drop)(struct net *net);
|
||||||
|
};
|
||||||
|
|
||||||
|
void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh);
|
||||||
|
--- a/net/netfilter/core.c
|
||||||
|
+++ b/net/netfilter/core.c
|
||||||
|
@@ -341,7 +341,6 @@ void nf_unregister_net_hook(struct net *
|
||||||
|
{
|
||||||
|
struct nf_hook_entries __rcu **pp;
|
||||||
|
struct nf_hook_entries *p;
|
||||||
|
- unsigned int nfq;
|
||||||
|
|
||||||
|
pp = nf_hook_entry_head(net, reg);
|
||||||
|
if (!pp)
|
||||||
|
@@ -364,10 +363,7 @@ void nf_unregister_net_hook(struct net *
|
||||||
|
|
||||||
|
synchronize_net();
|
||||||
|
|
||||||
|
- /* other cpu might still process nfqueue verdict that used reg */
|
||||||
|
- nfq = nf_queue_nf_hook_drop(net);
|
||||||
|
- if (nfq)
|
||||||
|
- synchronize_net();
|
||||||
|
+ nf_queue_nf_hook_drop(net);
|
||||||
|
kvfree(p);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(nf_unregister_net_hook);
|
||||||
|
--- a/net/netfilter/nf_internals.h
|
||||||
|
+++ b/net/netfilter/nf_internals.h
|
||||||
|
@@ -10,7 +10,7 @@
|
||||||
|
int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
|
||||||
|
const struct nf_hook_entries *entries, unsigned int index,
|
||||||
|
unsigned int verdict);
|
||||||
|
-unsigned int nf_queue_nf_hook_drop(struct net *net);
|
||||||
|
+void nf_queue_nf_hook_drop(struct net *net);
|
||||||
|
|
||||||
|
/* nf_log.c */
|
||||||
|
int __init netfilter_log_init(void);
|
||||||
|
--- a/net/netfilter/nf_queue.c
|
||||||
|
+++ b/net/netfilter/nf_queue.c
|
||||||
|
@@ -96,18 +96,15 @@ void nf_queue_entry_get_refs(struct nf_q
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
|
||||||
|
|
||||||
|
-unsigned int nf_queue_nf_hook_drop(struct net *net)
|
||||||
|
+void nf_queue_nf_hook_drop(struct net *net)
|
||||||
|
{
|
||||||
|
const struct nf_queue_handler *qh;
|
||||||
|
- unsigned int count = 0;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
qh = rcu_dereference(net->nf.queue_handler);
|
||||||
|
if (qh)
|
||||||
|
- count = qh->nf_hook_drop(net);
|
||||||
|
+ qh->nf_hook_drop(net);
|
||||||
|
rcu_read_unlock();
|
||||||
|
-
|
||||||
|
- return count;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nf_queue_nf_hook_drop);
|
||||||
|
|
||||||
|
--- a/net/netfilter/nfnetlink_queue.c
|
||||||
|
+++ b/net/netfilter/nfnetlink_queue.c
|
||||||
|
@@ -941,23 +941,18 @@ static struct notifier_block nfqnl_dev_n
|
||||||
|
.notifier_call = nfqnl_rcv_dev_event,
|
||||||
|
};
|
||||||
|
|
||||||
|
-static unsigned int nfqnl_nf_hook_drop(struct net *net)
|
||||||
|
+static void nfqnl_nf_hook_drop(struct net *net)
|
||||||
|
{
|
||||||
|
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
|
||||||
|
- unsigned int instances = 0;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < INSTANCE_BUCKETS; i++) {
|
||||||
|
struct nfqnl_instance *inst;
|
||||||
|
struct hlist_head *head = &q->instance_table[i];
|
||||||
|
|
||||||
|
- hlist_for_each_entry_rcu(inst, head, hlist) {
|
||||||
|
+ hlist_for_each_entry_rcu(inst, head, hlist)
|
||||||
|
nfqnl_flush(inst, NULL, 0);
|
||||||
|
- instances++;
|
||||||
|
- }
|
||||||
|
}
|
||||||
|
-
|
||||||
|
- return instances;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
|
@ -0,0 +1,132 @@
|
||||||
|
From 8c873e2199700c2de7dbd5eedb9d90d5f109462b Mon Sep 17 00:00:00 2001
|
||||||
|
From: Florian Westphal <fw@strlen.de>
|
||||||
|
Date: Fri, 1 Dec 2017 00:21:04 +0100
|
||||||
|
Subject: [PATCH 04/11] netfilter: core: free hooks with call_rcu
|
||||||
|
|
||||||
|
Giuseppe Scrivano says:
|
||||||
|
"SELinux, if enabled, registers for each new network namespace 6
|
||||||
|
netfilter hooks."
|
||||||
|
|
||||||
|
Cost for this is high. With synchronize_net() removed:
|
||||||
|
"The net benefit on an SMP machine with two cores is that creating a
|
||||||
|
new network namespace takes -40% of the original time."
|
||||||
|
|
||||||
|
This patch replaces synchronize_net+kvfree with call_rcu().
|
||||||
|
We store rcu_head at the tail of a structure that has no fixed layout,
|
||||||
|
i.e. we cannot use offsetof() to compute the start of the original
|
||||||
|
allocation. Thus store this information right after the rcu head.
|
||||||
|
|
||||||
|
We could simplify this by just placing the rcu_head at the start
|
||||||
|
of struct nf_hook_entries. However, this structure is used in
|
||||||
|
packet processing hotpath, so only place what is needed for that
|
||||||
|
at the beginning of the struct.
|
||||||
|
|
||||||
|
Reported-by: Giuseppe Scrivano <gscrivan@redhat.com>
|
||||||
|
Signed-off-by: Florian Westphal <fw@strlen.de>
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
include/linux/netfilter.h | 19 +++++++++++++++----
|
||||||
|
net/netfilter/core.c | 34 ++++++++++++++++++++++++++++------
|
||||||
|
2 files changed, 43 insertions(+), 10 deletions(-)
|
||||||
|
|
||||||
|
--- a/include/linux/netfilter.h
|
||||||
|
+++ b/include/linux/netfilter.h
|
||||||
|
@@ -77,17 +77,28 @@ struct nf_hook_entry {
|
||||||
|
void *priv;
|
||||||
|
};
|
||||||
|
|
||||||
|
+struct nf_hook_entries_rcu_head {
|
||||||
|
+ struct rcu_head head;
|
||||||
|
+ void *allocation;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
struct nf_hook_entries {
|
||||||
|
u16 num_hook_entries;
|
||||||
|
/* padding */
|
||||||
|
struct nf_hook_entry hooks[];
|
||||||
|
|
||||||
|
- /* trailer: pointers to original orig_ops of each hook.
|
||||||
|
- *
|
||||||
|
- * This is not part of struct nf_hook_entry since its only
|
||||||
|
- * needed in slow path (hook register/unregister).
|
||||||
|
+ /* trailer: pointers to original orig_ops of each hook,
|
||||||
|
+ * followed by rcu_head and scratch space used for freeing
|
||||||
|
+ * the structure via call_rcu.
|
||||||
|
*
|
||||||
|
+ * This is not part of struct nf_hook_entry since its only
|
||||||
|
+ * needed in slow path (hook register/unregister):
|
||||||
|
* const struct nf_hook_ops *orig_ops[]
|
||||||
|
+ *
|
||||||
|
+ * For the same reason, we store this at end -- its
|
||||||
|
+ * only needed when a hook is deleted, not during
|
||||||
|
+ * packet path processing:
|
||||||
|
+ * struct nf_hook_entries_rcu_head head
|
||||||
|
*/
|
||||||
|
};
|
||||||
|
|
||||||
|
--- a/net/netfilter/core.c
|
||||||
|
+++ b/net/netfilter/core.c
|
||||||
|
@@ -74,7 +74,8 @@ static struct nf_hook_entries *allocate_
|
||||||
|
struct nf_hook_entries *e;
|
||||||
|
size_t alloc = sizeof(*e) +
|
||||||
|
sizeof(struct nf_hook_entry) * num +
|
||||||
|
- sizeof(struct nf_hook_ops *) * num;
|
||||||
|
+ sizeof(struct nf_hook_ops *) * num +
|
||||||
|
+ sizeof(struct nf_hook_entries_rcu_head);
|
||||||
|
|
||||||
|
if (num == 0)
|
||||||
|
return NULL;
|
||||||
|
@@ -85,6 +86,30 @@ static struct nf_hook_entries *allocate_
|
||||||
|
return e;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static void __nf_hook_entries_free(struct rcu_head *h)
|
||||||
|
+{
|
||||||
|
+ struct nf_hook_entries_rcu_head *head;
|
||||||
|
+
|
||||||
|
+ head = container_of(h, struct nf_hook_entries_rcu_head, head);
|
||||||
|
+ kvfree(head->allocation);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void nf_hook_entries_free(struct nf_hook_entries *e)
|
||||||
|
+{
|
||||||
|
+ struct nf_hook_entries_rcu_head *head;
|
||||||
|
+ struct nf_hook_ops **ops;
|
||||||
|
+ unsigned int num;
|
||||||
|
+
|
||||||
|
+ if (!e)
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ num = e->num_hook_entries;
|
||||||
|
+ ops = nf_hook_entries_get_hook_ops(e);
|
||||||
|
+ head = (void *)&ops[num];
|
||||||
|
+ head->allocation = e;
|
||||||
|
+ call_rcu(&head->head, __nf_hook_entries_free);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static unsigned int accept_all(void *priv,
|
||||||
|
struct sk_buff *skb,
|
||||||
|
const struct nf_hook_state *state)
|
||||||
|
@@ -291,9 +316,8 @@ int nf_register_net_hook(struct net *net
|
||||||
|
#ifdef HAVE_JUMP_LABEL
|
||||||
|
static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
|
||||||
|
#endif
|
||||||
|
- synchronize_net();
|
||||||
|
BUG_ON(p == new_hooks);
|
||||||
|
- kvfree(p);
|
||||||
|
+ nf_hook_entries_free(p);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(nf_register_net_hook);
|
||||||
|
@@ -361,10 +385,8 @@ void nf_unregister_net_hook(struct net *
|
||||||
|
if (!p)
|
||||||
|
return;
|
||||||
|
|
||||||
|
- synchronize_net();
|
||||||
|
-
|
||||||
|
nf_queue_nf_hook_drop(net);
|
||||||
|
- kvfree(p);
|
||||||
|
+ nf_hook_entries_free(p);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(nf_unregister_net_hook);
|
||||||
|
|
|
@ -0,0 +1,200 @@
|
||||||
|
From b0f38338aef2dae5ade3c16acf713737e3b15a73 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Florian Westphal <fw@strlen.de>
|
||||||
|
Date: Sun, 3 Dec 2017 00:58:47 +0100
|
||||||
|
Subject: [PATCH 05/11] netfilter: reduce size of hook entry point locations
|
||||||
|
|
||||||
|
struct net contains:
|
||||||
|
|
||||||
|
struct nf_hook_entries __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
|
||||||
|
|
||||||
|
which store the hook entry point locations for the various protocol
|
||||||
|
families and the hooks.
|
||||||
|
|
||||||
|
Using array results in compact c code when doing accesses, i.e.
|
||||||
|
x = rcu_dereference(net->nf.hooks[pf][hook]);
|
||||||
|
|
||||||
|
but its also wasting a lot of memory, as most families are
|
||||||
|
not used.
|
||||||
|
|
||||||
|
So split the array into those families that are used, which
|
||||||
|
are only 5 (instead of 13). In most cases, the 'pf' argument is
|
||||||
|
constant, i.e. gcc removes switch statement.
|
||||||
|
|
||||||
|
struct net before:
|
||||||
|
/* size: 5184, cachelines: 81, members: 46 */
|
||||||
|
after:
|
||||||
|
/* size: 4672, cachelines: 73, members: 46 */
|
||||||
|
|
||||||
|
Signed-off-by: Florian Westphal <fw@strlen.de>
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
include/linux/netfilter.h | 24 ++++++++++++++++++++++--
|
||||||
|
include/net/netns/netfilter.h | 6 +++++-
|
||||||
|
net/bridge/br_netfilter_hooks.c | 2 +-
|
||||||
|
net/netfilter/core.c | 38 ++++++++++++++++++++++++++++++--------
|
||||||
|
net/netfilter/nf_queue.c | 21 +++++++++++++++++++--
|
||||||
|
5 files changed, 77 insertions(+), 14 deletions(-)
|
||||||
|
|
||||||
|
--- a/include/linux/netfilter.h
|
||||||
|
+++ b/include/linux/netfilter.h
|
||||||
|
@@ -195,7 +195,7 @@ static inline int nf_hook(u_int8_t pf, u
|
||||||
|
struct net_device *indev, struct net_device *outdev,
|
||||||
|
int (*okfn)(struct net *, struct sock *, struct sk_buff *))
|
||||||
|
{
|
||||||
|
- struct nf_hook_entries *hook_head;
|
||||||
|
+ struct nf_hook_entries *hook_head = NULL;
|
||||||
|
int ret = 1;
|
||||||
|
|
||||||
|
#ifdef HAVE_JUMP_LABEL
|
||||||
|
@@ -206,7 +206,27 @@ static inline int nf_hook(u_int8_t pf, u
|
||||||
|
#endif
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
- hook_head = rcu_dereference(net->nf.hooks[pf][hook]);
|
||||||
|
+ switch (pf) {
|
||||||
|
+ case NFPROTO_IPV4:
|
||||||
|
+ hook_head = rcu_dereference(net->nf.hooks_ipv4[hook]);
|
||||||
|
+ break;
|
||||||
|
+ case NFPROTO_IPV6:
|
||||||
|
+ hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]);
|
||||||
|
+ break;
|
||||||
|
+ case NFPROTO_ARP:
|
||||||
|
+ hook_head = rcu_dereference(net->nf.hooks_arp[hook]);
|
||||||
|
+ break;
|
||||||
|
+ case NFPROTO_BRIDGE:
|
||||||
|
+ hook_head = rcu_dereference(net->nf.hooks_bridge[hook]);
|
||||||
|
+ break;
|
||||||
|
+ case NFPROTO_DECNET:
|
||||||
|
+ hook_head = rcu_dereference(net->nf.hooks_decnet[hook]);
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ WARN_ON_ONCE(1);
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
if (hook_head) {
|
||||||
|
struct nf_hook_state state;
|
||||||
|
|
||||||
|
--- a/include/net/netns/netfilter.h
|
||||||
|
+++ b/include/net/netns/netfilter.h
|
||||||
|
@@ -17,7 +17,11 @@ struct netns_nf {
|
||||||
|
#ifdef CONFIG_SYSCTL
|
||||||
|
struct ctl_table_header *nf_log_dir_header;
|
||||||
|
#endif
|
||||||
|
- struct nf_hook_entries __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
|
||||||
|
+ struct nf_hook_entries __rcu *hooks_ipv4[NF_MAX_HOOKS];
|
||||||
|
+ struct nf_hook_entries __rcu *hooks_ipv6[NF_MAX_HOOKS];
|
||||||
|
+ struct nf_hook_entries __rcu *hooks_arp[NF_MAX_HOOKS];
|
||||||
|
+ struct nf_hook_entries __rcu *hooks_bridge[NF_MAX_HOOKS];
|
||||||
|
+ struct nf_hook_entries __rcu *hooks_decnet[NF_MAX_HOOKS];
|
||||||
|
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
|
||||||
|
bool defrag_ipv4;
|
||||||
|
#endif
|
||||||
|
--- a/net/bridge/br_netfilter_hooks.c
|
||||||
|
+++ b/net/bridge/br_netfilter_hooks.c
|
||||||
|
@@ -992,7 +992,7 @@ int br_nf_hook_thresh(unsigned int hook,
|
||||||
|
unsigned int i;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
- e = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
|
||||||
|
+ e = rcu_dereference(net->nf.hooks_bridge[hook]);
|
||||||
|
if (!e)
|
||||||
|
return okfn(net, sk, skb);
|
||||||
|
|
||||||
|
--- a/net/netfilter/core.c
|
||||||
|
+++ b/net/netfilter/core.c
|
||||||
|
@@ -264,8 +264,23 @@ out_assign:
|
||||||
|
|
||||||
|
static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
|
||||||
|
{
|
||||||
|
- if (reg->pf != NFPROTO_NETDEV)
|
||||||
|
- return net->nf.hooks[reg->pf]+reg->hooknum;
|
||||||
|
+ switch (reg->pf) {
|
||||||
|
+ case NFPROTO_NETDEV:
|
||||||
|
+ break;
|
||||||
|
+ case NFPROTO_ARP:
|
||||||
|
+ return net->nf.hooks_arp + reg->hooknum;
|
||||||
|
+ case NFPROTO_BRIDGE:
|
||||||
|
+ return net->nf.hooks_bridge + reg->hooknum;
|
||||||
|
+ case NFPROTO_IPV4:
|
||||||
|
+ return net->nf.hooks_ipv4 + reg->hooknum;
|
||||||
|
+ case NFPROTO_IPV6:
|
||||||
|
+ return net->nf.hooks_ipv6 + reg->hooknum;
|
||||||
|
+ case NFPROTO_DECNET:
|
||||||
|
+ return net->nf.hooks_decnet + reg->hooknum;
|
||||||
|
+ default:
|
||||||
|
+ WARN_ON_ONCE(1);
|
||||||
|
+ return NULL;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
#ifdef CONFIG_NETFILTER_INGRESS
|
||||||
|
if (reg->hooknum == NF_NETDEV_INGRESS) {
|
||||||
|
@@ -534,14 +549,21 @@ void (*nf_nat_decode_session_hook)(struc
|
||||||
|
EXPORT_SYMBOL(nf_nat_decode_session_hook);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
-static int __net_init netfilter_net_init(struct net *net)
|
||||||
|
+static void __net_init __netfilter_net_init(struct nf_hook_entries *e[NF_MAX_HOOKS])
|
||||||
|
{
|
||||||
|
- int i, h;
|
||||||
|
+ int h;
|
||||||
|
|
||||||
|
- for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) {
|
||||||
|
- for (h = 0; h < NF_MAX_HOOKS; h++)
|
||||||
|
- RCU_INIT_POINTER(net->nf.hooks[i][h], NULL);
|
||||||
|
- }
|
||||||
|
+ for (h = 0; h < NF_MAX_HOOKS; h++)
|
||||||
|
+ RCU_INIT_POINTER(e[h], NULL);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int __net_init netfilter_net_init(struct net *net)
|
||||||
|
+{
|
||||||
|
+ __netfilter_net_init(net->nf.hooks_ipv4);
|
||||||
|
+ __netfilter_net_init(net->nf.hooks_ipv6);
|
||||||
|
+ __netfilter_net_init(net->nf.hooks_arp);
|
||||||
|
+ __netfilter_net_init(net->nf.hooks_bridge);
|
||||||
|
+ __netfilter_net_init(net->nf.hooks_decnet);
|
||||||
|
|
||||||
|
#ifdef CONFIG_PROC_FS
|
||||||
|
net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
|
||||||
|
--- a/net/netfilter/nf_queue.c
|
||||||
|
+++ b/net/netfilter/nf_queue.c
|
||||||
|
@@ -201,6 +201,23 @@ repeat:
|
||||||
|
return NF_ACCEPT;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static struct nf_hook_entries *nf_hook_entries_head(const struct net *net, u8 pf, u8 hooknum)
|
||||||
|
+{
|
||||||
|
+ switch (pf) {
|
||||||
|
+ case NFPROTO_BRIDGE:
|
||||||
|
+ return rcu_dereference(net->nf.hooks_bridge[hooknum]);
|
||||||
|
+ case NFPROTO_IPV4:
|
||||||
|
+ return rcu_dereference(net->nf.hooks_ipv4[hooknum]);
|
||||||
|
+ case NFPROTO_IPV6:
|
||||||
|
+ return rcu_dereference(net->nf.hooks_ipv6[hooknum]);
|
||||||
|
+ default:
|
||||||
|
+ WARN_ON_ONCE(1);
|
||||||
|
+ return NULL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return NULL;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Caller must hold rcu read-side lock */
|
||||||
|
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
|
||||||
|
{
|
||||||
|
@@ -216,12 +233,12 @@ void nf_reinject(struct nf_queue_entry *
|
||||||
|
net = entry->state.net;
|
||||||
|
pf = entry->state.pf;
|
||||||
|
|
||||||
|
- hooks = rcu_dereference(net->nf.hooks[pf][entry->state.hook]);
|
||||||
|
+ hooks = nf_hook_entries_head(net, pf, entry->state.hook);
|
||||||
|
|
||||||
|
nf_queue_entry_release_refs(entry);
|
||||||
|
|
||||||
|
i = entry->hook_index;
|
||||||
|
- if (WARN_ON_ONCE(i >= hooks->num_hook_entries)) {
|
||||||
|
+ if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) {
|
||||||
|
kfree_skb(skb);
|
||||||
|
kfree(entry);
|
||||||
|
return;
|
|
@ -0,0 +1,95 @@
|
||||||
|
From ef57170bbfdd6958281011332b1fd237712f69f0 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Florian Westphal <fw@strlen.de>
|
||||||
|
Date: Thu, 7 Dec 2017 16:28:24 +0100
|
||||||
|
Subject: [PATCH 06/11] netfilter: reduce hook array sizes to what is needed
|
||||||
|
|
||||||
|
Not all families share the same hook count, adjust sizes to what is
|
||||||
|
needed.
|
||||||
|
|
||||||
|
struct net before:
|
||||||
|
/* size: 6592, cachelines: 103, members: 46 */
|
||||||
|
after:
|
||||||
|
/* size: 5952, cachelines: 93, members: 46 */
|
||||||
|
|
||||||
|
Signed-off-by: Florian Westphal <fw@strlen.de>
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
include/net/netns/netfilter.h | 10 +++++-----
|
||||||
|
net/netfilter/core.c | 24 +++++++++++++++++-------
|
||||||
|
2 files changed, 22 insertions(+), 12 deletions(-)
|
||||||
|
|
||||||
|
--- a/include/net/netns/netfilter.h
|
||||||
|
+++ b/include/net/netns/netfilter.h
|
||||||
|
@@ -17,11 +17,11 @@ struct netns_nf {
|
||||||
|
#ifdef CONFIG_SYSCTL
|
||||||
|
struct ctl_table_header *nf_log_dir_header;
|
||||||
|
#endif
|
||||||
|
- struct nf_hook_entries __rcu *hooks_ipv4[NF_MAX_HOOKS];
|
||||||
|
- struct nf_hook_entries __rcu *hooks_ipv6[NF_MAX_HOOKS];
|
||||||
|
- struct nf_hook_entries __rcu *hooks_arp[NF_MAX_HOOKS];
|
||||||
|
- struct nf_hook_entries __rcu *hooks_bridge[NF_MAX_HOOKS];
|
||||||
|
- struct nf_hook_entries __rcu *hooks_decnet[NF_MAX_HOOKS];
|
||||||
|
+ struct nf_hook_entries __rcu *hooks_ipv4[NF_INET_NUMHOOKS];
|
||||||
|
+ struct nf_hook_entries __rcu *hooks_ipv6[NF_INET_NUMHOOKS];
|
||||||
|
+ struct nf_hook_entries __rcu *hooks_arp[NF_ARP_NUMHOOKS];
|
||||||
|
+ struct nf_hook_entries __rcu *hooks_bridge[NF_INET_NUMHOOKS];
|
||||||
|
+ struct nf_hook_entries __rcu *hooks_decnet[NF_DN_NUMHOOKS];
|
||||||
|
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
|
||||||
|
bool defrag_ipv4;
|
||||||
|
#endif
|
||||||
|
--- a/net/netfilter/core.c
|
||||||
|
+++ b/net/netfilter/core.c
|
||||||
|
@@ -268,14 +268,24 @@ static struct nf_hook_entries __rcu **nf
|
||||||
|
case NFPROTO_NETDEV:
|
||||||
|
break;
|
||||||
|
case NFPROTO_ARP:
|
||||||
|
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_arp) <= reg->hooknum))
|
||||||
|
+ return NULL;
|
||||||
|
return net->nf.hooks_arp + reg->hooknum;
|
||||||
|
case NFPROTO_BRIDGE:
|
||||||
|
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_bridge) <= reg->hooknum))
|
||||||
|
+ return NULL;
|
||||||
|
return net->nf.hooks_bridge + reg->hooknum;
|
||||||
|
case NFPROTO_IPV4:
|
||||||
|
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv4) <= reg->hooknum))
|
||||||
|
+ return NULL;
|
||||||
|
return net->nf.hooks_ipv4 + reg->hooknum;
|
||||||
|
case NFPROTO_IPV6:
|
||||||
|
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= reg->hooknum))
|
||||||
|
+ return NULL;
|
||||||
|
return net->nf.hooks_ipv6 + reg->hooknum;
|
||||||
|
case NFPROTO_DECNET:
|
||||||
|
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= reg->hooknum))
|
||||||
|
+ return NULL;
|
||||||
|
return net->nf.hooks_decnet + reg->hooknum;
|
||||||
|
default:
|
||||||
|
WARN_ON_ONCE(1);
|
||||||
|
@@ -549,21 +559,21 @@ void (*nf_nat_decode_session_hook)(struc
|
||||||
|
EXPORT_SYMBOL(nf_nat_decode_session_hook);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
-static void __net_init __netfilter_net_init(struct nf_hook_entries *e[NF_MAX_HOOKS])
|
||||||
|
+static void __net_init __netfilter_net_init(struct nf_hook_entries **e, int max)
|
||||||
|
{
|
||||||
|
int h;
|
||||||
|
|
||||||
|
- for (h = 0; h < NF_MAX_HOOKS; h++)
|
||||||
|
+ for (h = 0; h < max; h++)
|
||||||
|
RCU_INIT_POINTER(e[h], NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __net_init netfilter_net_init(struct net *net)
|
||||||
|
{
|
||||||
|
- __netfilter_net_init(net->nf.hooks_ipv4);
|
||||||
|
- __netfilter_net_init(net->nf.hooks_ipv6);
|
||||||
|
- __netfilter_net_init(net->nf.hooks_arp);
|
||||||
|
- __netfilter_net_init(net->nf.hooks_bridge);
|
||||||
|
- __netfilter_net_init(net->nf.hooks_decnet);
|
||||||
|
+ __netfilter_net_init(net->nf.hooks_ipv4, ARRAY_SIZE(net->nf.hooks_ipv4));
|
||||||
|
+ __netfilter_net_init(net->nf.hooks_ipv6, ARRAY_SIZE(net->nf.hooks_ipv6));
|
||||||
|
+ __netfilter_net_init(net->nf.hooks_arp, ARRAY_SIZE(net->nf.hooks_arp));
|
||||||
|
+ __netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge));
|
||||||
|
+ __netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet));
|
||||||
|
|
||||||
|
#ifdef CONFIG_PROC_FS
|
||||||
|
net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
|
|
@ -0,0 +1,67 @@
|
||||||
|
From bb4badf3a3dc81190f7c1c1fa063cdefb18df45f Mon Sep 17 00:00:00 2001
|
||||||
|
From: Florian Westphal <fw@strlen.de>
|
||||||
|
Date: Thu, 7 Dec 2017 16:28:25 +0100
|
||||||
|
Subject: [PATCH 07/11] netfilter: don't allocate space for decnet hooks unless
|
||||||
|
needed
|
||||||
|
|
||||||
|
no need to define hook points if the family isn't supported.
|
||||||
|
|
||||||
|
Signed-off-by: Florian Westphal <fw@strlen.de>
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
include/linux/netfilter.h | 2 ++
|
||||||
|
include/net/netns/netfilter.h | 2 ++
|
||||||
|
net/netfilter/core.c | 4 ++++
|
||||||
|
3 files changed, 8 insertions(+)
|
||||||
|
|
||||||
|
--- a/include/linux/netfilter.h
|
||||||
|
+++ b/include/linux/netfilter.h
|
||||||
|
@@ -219,9 +219,11 @@ static inline int nf_hook(u_int8_t pf, u
|
||||||
|
case NFPROTO_BRIDGE:
|
||||||
|
hook_head = rcu_dereference(net->nf.hooks_bridge[hook]);
|
||||||
|
break;
|
||||||
|
+#if IS_ENABLED(CONFIG_DECNET)
|
||||||
|
case NFPROTO_DECNET:
|
||||||
|
hook_head = rcu_dereference(net->nf.hooks_decnet[hook]);
|
||||||
|
break;
|
||||||
|
+#endif
|
||||||
|
default:
|
||||||
|
WARN_ON_ONCE(1);
|
||||||
|
break;
|
||||||
|
--- a/include/net/netns/netfilter.h
|
||||||
|
+++ b/include/net/netns/netfilter.h
|
||||||
|
@@ -21,7 +21,9 @@ struct netns_nf {
|
||||||
|
struct nf_hook_entries __rcu *hooks_ipv6[NF_INET_NUMHOOKS];
|
||||||
|
struct nf_hook_entries __rcu *hooks_arp[NF_ARP_NUMHOOKS];
|
||||||
|
struct nf_hook_entries __rcu *hooks_bridge[NF_INET_NUMHOOKS];
|
||||||
|
+#if IS_ENABLED(CONFIG_DECNET)
|
||||||
|
struct nf_hook_entries __rcu *hooks_decnet[NF_DN_NUMHOOKS];
|
||||||
|
+#endif
|
||||||
|
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
|
||||||
|
bool defrag_ipv4;
|
||||||
|
#endif
|
||||||
|
--- a/net/netfilter/core.c
|
||||||
|
+++ b/net/netfilter/core.c
|
||||||
|
@@ -283,10 +283,12 @@ static struct nf_hook_entries __rcu **nf
|
||||||
|
if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= reg->hooknum))
|
||||||
|
return NULL;
|
||||||
|
return net->nf.hooks_ipv6 + reg->hooknum;
|
||||||
|
+#if IS_ENABLED(CONFIG_DECNET)
|
||||||
|
case NFPROTO_DECNET:
|
||||||
|
if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= reg->hooknum))
|
||||||
|
return NULL;
|
||||||
|
return net->nf.hooks_decnet + reg->hooknum;
|
||||||
|
+#endif
|
||||||
|
default:
|
||||||
|
WARN_ON_ONCE(1);
|
||||||
|
return NULL;
|
||||||
|
@@ -573,7 +575,9 @@ static int __net_init netfilter_net_init
|
||||||
|
__netfilter_net_init(net->nf.hooks_ipv6, ARRAY_SIZE(net->nf.hooks_ipv6));
|
||||||
|
__netfilter_net_init(net->nf.hooks_arp, ARRAY_SIZE(net->nf.hooks_arp));
|
||||||
|
__netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge));
|
||||||
|
+#if IS_ENABLED(CONFIG_DECNET)
|
||||||
|
__netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet));
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_PROC_FS
|
||||||
|
net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
|
|
@ -0,0 +1,165 @@
|
||||||
|
From 2a95183a5e0375df756efb2ca37602d71e8455f9 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Florian Westphal <fw@strlen.de>
|
||||||
|
Date: Thu, 7 Dec 2017 16:28:26 +0100
|
||||||
|
Subject: [PATCH 08/11] netfilter: don't allocate space for arp/bridge hooks
|
||||||
|
unless needed
|
||||||
|
|
||||||
|
no need to define hook points if the family isn't supported.
|
||||||
|
Because we need these hooks for either nftables, arp/ebtables
|
||||||
|
or the 'call-iptables' hack we have in the bridge layer add two
|
||||||
|
new dependencies, NETFILTER_FAMILY_{ARP,BRIDGE}, and have the
|
||||||
|
users select them.
|
||||||
|
|
||||||
|
Signed-off-by: Florian Westphal <fw@strlen.de>
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
include/linux/netfilter.h | 4 ++++
|
||||||
|
include/net/netns/netfilter.h | 4 ++++
|
||||||
|
net/Kconfig | 1 +
|
||||||
|
net/bridge/netfilter/Kconfig | 2 ++
|
||||||
|
net/ipv4/netfilter/Kconfig | 2 ++
|
||||||
|
net/netfilter/Kconfig | 6 ++++++
|
||||||
|
net/netfilter/core.c | 8 ++++++++
|
||||||
|
net/netfilter/nf_queue.c | 2 ++
|
||||||
|
8 files changed, 29 insertions(+)
|
||||||
|
|
||||||
|
--- a/include/linux/netfilter.h
|
||||||
|
+++ b/include/linux/netfilter.h
|
||||||
|
@@ -214,10 +214,14 @@ static inline int nf_hook(u_int8_t pf, u
|
||||||
|
hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]);
|
||||||
|
break;
|
||||||
|
case NFPROTO_ARP:
|
||||||
|
+#ifdef CONFIG_NETFILTER_FAMILY_ARP
|
||||||
|
hook_head = rcu_dereference(net->nf.hooks_arp[hook]);
|
||||||
|
+#endif
|
||||||
|
break;
|
||||||
|
case NFPROTO_BRIDGE:
|
||||||
|
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
|
||||||
|
hook_head = rcu_dereference(net->nf.hooks_bridge[hook]);
|
||||||
|
+#endif
|
||||||
|
break;
|
||||||
|
#if IS_ENABLED(CONFIG_DECNET)
|
||||||
|
case NFPROTO_DECNET:
|
||||||
|
--- a/include/net/netns/netfilter.h
|
||||||
|
+++ b/include/net/netns/netfilter.h
|
||||||
|
@@ -19,8 +19,12 @@ struct netns_nf {
|
||||||
|
#endif
|
||||||
|
struct nf_hook_entries __rcu *hooks_ipv4[NF_INET_NUMHOOKS];
|
||||||
|
struct nf_hook_entries __rcu *hooks_ipv6[NF_INET_NUMHOOKS];
|
||||||
|
+#ifdef CONFIG_NETFILTER_FAMILY_ARP
|
||||||
|
struct nf_hook_entries __rcu *hooks_arp[NF_ARP_NUMHOOKS];
|
||||||
|
+#endif
|
||||||
|
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
|
||||||
|
struct nf_hook_entries __rcu *hooks_bridge[NF_INET_NUMHOOKS];
|
||||||
|
+#endif
|
||||||
|
#if IS_ENABLED(CONFIG_DECNET)
|
||||||
|
struct nf_hook_entries __rcu *hooks_decnet[NF_DN_NUMHOOKS];
|
||||||
|
#endif
|
||||||
|
--- a/net/Kconfig
|
||||||
|
+++ b/net/Kconfig
|
||||||
|
@@ -182,6 +182,7 @@ config BRIDGE_NETFILTER
|
||||||
|
depends on BRIDGE
|
||||||
|
depends on NETFILTER && INET
|
||||||
|
depends on NETFILTER_ADVANCED
|
||||||
|
+ select NETFILTER_FAMILY_BRIDGE
|
||||||
|
default m
|
||||||
|
---help---
|
||||||
|
Enabling this option will let arptables resp. iptables see bridged
|
||||||
|
--- a/net/bridge/netfilter/Kconfig
|
||||||
|
+++ b/net/bridge/netfilter/Kconfig
|
||||||
|
@@ -4,6 +4,7 @@
|
||||||
|
#
|
||||||
|
menuconfig NF_TABLES_BRIDGE
|
||||||
|
depends on BRIDGE && NETFILTER && NF_TABLES
|
||||||
|
+ select NETFILTER_FAMILY_BRIDGE
|
||||||
|
tristate "Ethernet Bridge nf_tables support"
|
||||||
|
|
||||||
|
if NF_TABLES_BRIDGE
|
||||||
|
@@ -29,6 +30,7 @@ endif # NF_TABLES_BRIDGE
|
||||||
|
menuconfig BRIDGE_NF_EBTABLES
|
||||||
|
tristate "Ethernet Bridge tables (ebtables) support"
|
||||||
|
depends on BRIDGE && NETFILTER && NETFILTER_XTABLES
|
||||||
|
+ select NETFILTER_FAMILY_BRIDGE
|
||||||
|
help
|
||||||
|
ebtables is a general, extensible frame/packet identification
|
||||||
|
framework. Say 'Y' or 'M' here if you want to do Ethernet
|
||||||
|
--- a/net/ipv4/netfilter/Kconfig
|
||||||
|
+++ b/net/ipv4/netfilter/Kconfig
|
||||||
|
@@ -72,6 +72,7 @@ endif # NF_TABLES_IPV4
|
||||||
|
|
||||||
|
config NF_TABLES_ARP
|
||||||
|
tristate "ARP nf_tables support"
|
||||||
|
+ select NETFILTER_FAMILY_ARP
|
||||||
|
help
|
||||||
|
This option enables the ARP support for nf_tables.
|
||||||
|
|
||||||
|
@@ -392,6 +393,7 @@ endif # IP_NF_IPTABLES
|
||||||
|
config IP_NF_ARPTABLES
|
||||||
|
tristate "ARP tables support"
|
||||||
|
select NETFILTER_XTABLES
|
||||||
|
+ select NETFILTER_FAMILY_ARP
|
||||||
|
depends on NETFILTER_ADVANCED
|
||||||
|
help
|
||||||
|
arptables is a general, extensible packet identification framework.
|
||||||
|
--- a/net/netfilter/Kconfig
|
||||||
|
+++ b/net/netfilter/Kconfig
|
||||||
|
@@ -12,6 +12,12 @@ config NETFILTER_INGRESS
|
||||||
|
config NETFILTER_NETLINK
|
||||||
|
tristate
|
||||||
|
|
||||||
|
+config NETFILTER_FAMILY_BRIDGE
|
||||||
|
+ bool
|
||||||
|
+
|
||||||
|
+config NETFILTER_FAMILY_ARP
|
||||||
|
+ bool
|
||||||
|
+
|
||||||
|
config NETFILTER_NETLINK_ACCT
|
||||||
|
tristate "Netfilter NFACCT over NFNETLINK interface"
|
||||||
|
depends on NETFILTER_ADVANCED
|
||||||
|
--- a/net/netfilter/core.c
|
||||||
|
+++ b/net/netfilter/core.c
|
||||||
|
@@ -267,14 +267,18 @@ static struct nf_hook_entries __rcu **nf
|
||||||
|
switch (reg->pf) {
|
||||||
|
case NFPROTO_NETDEV:
|
||||||
|
break;
|
||||||
|
+#ifdef CONFIG_NETFILTER_FAMILY_ARP
|
||||||
|
case NFPROTO_ARP:
|
||||||
|
if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_arp) <= reg->hooknum))
|
||||||
|
return NULL;
|
||||||
|
return net->nf.hooks_arp + reg->hooknum;
|
||||||
|
+#endif
|
||||||
|
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
|
||||||
|
case NFPROTO_BRIDGE:
|
||||||
|
if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_bridge) <= reg->hooknum))
|
||||||
|
return NULL;
|
||||||
|
return net->nf.hooks_bridge + reg->hooknum;
|
||||||
|
+#endif
|
||||||
|
case NFPROTO_IPV4:
|
||||||
|
if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv4) <= reg->hooknum))
|
||||||
|
return NULL;
|
||||||
|
@@ -573,8 +577,12 @@ static int __net_init netfilter_net_init
|
||||||
|
{
|
||||||
|
__netfilter_net_init(net->nf.hooks_ipv4, ARRAY_SIZE(net->nf.hooks_ipv4));
|
||||||
|
__netfilter_net_init(net->nf.hooks_ipv6, ARRAY_SIZE(net->nf.hooks_ipv6));
|
||||||
|
+#ifdef CONFIG_NETFILTER_FAMILY_ARP
|
||||||
|
__netfilter_net_init(net->nf.hooks_arp, ARRAY_SIZE(net->nf.hooks_arp));
|
||||||
|
+#endif
|
||||||
|
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
|
||||||
|
__netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge));
|
||||||
|
+#endif
|
||||||
|
#if IS_ENABLED(CONFIG_DECNET)
|
||||||
|
__netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet));
|
||||||
|
#endif
|
||||||
|
--- a/net/netfilter/nf_queue.c
|
||||||
|
+++ b/net/netfilter/nf_queue.c
|
||||||
|
@@ -204,8 +204,10 @@ repeat:
|
||||||
|
static struct nf_hook_entries *nf_hook_entries_head(const struct net *net, u8 pf, u8 hooknum)
|
||||||
|
{
|
||||||
|
switch (pf) {
|
||||||
|
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
|
||||||
|
case NFPROTO_BRIDGE:
|
||||||
|
return rcu_dereference(net->nf.hooks_bridge[hooknum]);
|
||||||
|
+#endif
|
||||||
|
case NFPROTO_IPV4:
|
||||||
|
return rcu_dereference(net->nf.hooks_ipv4[hooknum]);
|
||||||
|
case NFPROTO_IPV6:
|
|
@ -0,0 +1,98 @@
|
||||||
|
From 62a0fe46e2aaba1812d3cbcae014a41539f9eb09 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Sat, 9 Dec 2017 15:23:51 +0100
|
||||||
|
Subject: [PATCH 09/11] netfilter: core: pass hook number, family and device to
|
||||||
|
nf_find_hook_list()
|
||||||
|
|
||||||
|
Instead of passing struct nf_hook_ops, this is needed by follow up
|
||||||
|
patches to handle NFPROTO_INET from the core.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
net/netfilter/core.c | 36 +++++++++++++++++++-----------------
|
||||||
|
1 file changed, 19 insertions(+), 17 deletions(-)
|
||||||
|
|
||||||
|
--- a/net/netfilter/core.c
|
||||||
|
+++ b/net/netfilter/core.c
|
||||||
|
@@ -262,36 +262,38 @@ out_assign:
|
||||||
|
return old;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
|
||||||
|
+static struct nf_hook_entries __rcu **
|
||||||
|
+nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum,
|
||||||
|
+ struct net_device *dev)
|
||||||
|
{
|
||||||
|
- switch (reg->pf) {
|
||||||
|
+ switch (pf) {
|
||||||
|
case NFPROTO_NETDEV:
|
||||||
|
break;
|
||||||
|
#ifdef CONFIG_NETFILTER_FAMILY_ARP
|
||||||
|
case NFPROTO_ARP:
|
||||||
|
- if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_arp) <= reg->hooknum))
|
||||||
|
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_arp) <= hooknum))
|
||||||
|
return NULL;
|
||||||
|
- return net->nf.hooks_arp + reg->hooknum;
|
||||||
|
+ return net->nf.hooks_arp + hooknum;
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
|
||||||
|
case NFPROTO_BRIDGE:
|
||||||
|
- if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_bridge) <= reg->hooknum))
|
||||||
|
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_bridge) <= hooknum))
|
||||||
|
return NULL;
|
||||||
|
- return net->nf.hooks_bridge + reg->hooknum;
|
||||||
|
+ return net->nf.hooks_bridge + hooknum;
|
||||||
|
#endif
|
||||||
|
case NFPROTO_IPV4:
|
||||||
|
- if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv4) <= reg->hooknum))
|
||||||
|
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv4) <= hooknum))
|
||||||
|
return NULL;
|
||||||
|
- return net->nf.hooks_ipv4 + reg->hooknum;
|
||||||
|
+ return net->nf.hooks_ipv4 + hooknum;
|
||||||
|
case NFPROTO_IPV6:
|
||||||
|
- if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= reg->hooknum))
|
||||||
|
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= hooknum))
|
||||||
|
return NULL;
|
||||||
|
- return net->nf.hooks_ipv6 + reg->hooknum;
|
||||||
|
+ return net->nf.hooks_ipv6 + hooknum;
|
||||||
|
#if IS_ENABLED(CONFIG_DECNET)
|
||||||
|
case NFPROTO_DECNET:
|
||||||
|
- if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= reg->hooknum))
|
||||||
|
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= hooknum))
|
||||||
|
return NULL;
|
||||||
|
- return net->nf.hooks_decnet + reg->hooknum;
|
||||||
|
+ return net->nf.hooks_decnet + hooknum;
|
||||||
|
#endif
|
||||||
|
default:
|
||||||
|
WARN_ON_ONCE(1);
|
||||||
|
@@ -299,9 +301,9 @@ static struct nf_hook_entries __rcu **nf
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_NETFILTER_INGRESS
|
||||||
|
- if (reg->hooknum == NF_NETDEV_INGRESS) {
|
||||||
|
- if (reg->dev && dev_net(reg->dev) == net)
|
||||||
|
- return ®->dev->nf_hooks_ingress;
|
||||||
|
+ if (hooknum == NF_NETDEV_INGRESS) {
|
||||||
|
+ if (dev && dev_net(dev) == net)
|
||||||
|
+ return &dev->nf_hooks_ingress;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
WARN_ON_ONCE(1);
|
||||||
|
@@ -323,7 +325,7 @@ int nf_register_net_hook(struct net *net
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
- pp = nf_hook_entry_head(net, reg);
|
||||||
|
+ pp = nf_hook_entry_head(net, reg->pf, reg->hooknum, reg->dev);
|
||||||
|
if (!pp)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
@@ -397,7 +399,7 @@ void nf_unregister_net_hook(struct net *
|
||||||
|
struct nf_hook_entries __rcu **pp;
|
||||||
|
struct nf_hook_entries *p;
|
||||||
|
|
||||||
|
- pp = nf_hook_entry_head(net, reg);
|
||||||
|
+ pp = nf_hook_entry_head(net, reg->pf, reg->hooknum, reg->dev);
|
||||||
|
if (!pp)
|
||||||
|
return;
|
||||||
|
|
|
@ -0,0 +1,44 @@
|
||||||
|
From 3d3cdc38e8c265a9f9d3825e823e772872bca1b8 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Sat, 9 Dec 2017 15:19:14 +0100
|
||||||
|
Subject: [PATCH 01/11] netfilter: core: add nf_remove_net_hook
|
||||||
|
|
||||||
|
Just a cleanup, __nf_unregister_net_hook() is used by a follow up patch
|
||||||
|
when handling NFPROTO_INET as a real family from the core.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
net/netfilter/core.c | 8 ++++----
|
||||||
|
1 file changed, 4 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
--- a/net/netfilter/core.c
|
||||||
|
+++ b/net/netfilter/core.c
|
||||||
|
@@ -356,7 +356,7 @@ int nf_register_net_hook(struct net *net
|
||||||
|
EXPORT_SYMBOL(nf_register_net_hook);
|
||||||
|
|
||||||
|
/*
|
||||||
|
- * __nf_unregister_net_hook - remove a hook from blob
|
||||||
|
+ * nf_remove_net_hook - remove a hook from blob
|
||||||
|
*
|
||||||
|
* @oldp: current address of hook blob
|
||||||
|
* @unreg: hook to unregister
|
||||||
|
@@ -364,8 +364,8 @@ EXPORT_SYMBOL(nf_register_net_hook);
|
||||||
|
* This cannot fail, hook unregistration must always succeed.
|
||||||
|
* Therefore replace the to-be-removed hook with a dummy hook.
|
||||||
|
*/
|
||||||
|
-static void __nf_unregister_net_hook(struct nf_hook_entries *old,
|
||||||
|
- const struct nf_hook_ops *unreg)
|
||||||
|
+static void nf_remove_net_hook(struct nf_hook_entries *old,
|
||||||
|
+ const struct nf_hook_ops *unreg)
|
||||||
|
{
|
||||||
|
struct nf_hook_ops **orig_ops;
|
||||||
|
bool found = false;
|
||||||
|
@@ -411,7 +411,7 @@ void nf_unregister_net_hook(struct net *
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
- __nf_unregister_net_hook(p, reg);
|
||||||
|
+ nf_remove_net_hook(p, reg);
|
||||||
|
|
||||||
|
p = __nf_hook_entries_try_shrink(pp);
|
||||||
|
mutex_unlock(&nf_hook_mutex);
|
|
@ -0,0 +1,51 @@
|
||||||
|
From 30259408118f550f5969fda19c0d67020d21eda8 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Sat, 9 Dec 2017 15:26:37 +0100
|
||||||
|
Subject: [PATCH 10/11] netfilter: core: pass family as parameter to
|
||||||
|
nf_remove_net_hook()
|
||||||
|
|
||||||
|
So static_key_slow_dec applies to the family behind NFPROTO_INET.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
net/netfilter/core.c | 10 +++++-----
|
||||||
|
1 file changed, 5 insertions(+), 5 deletions(-)
|
||||||
|
|
||||||
|
--- a/net/netfilter/core.c
|
||||||
|
+++ b/net/netfilter/core.c
|
||||||
|
@@ -365,7 +365,7 @@ EXPORT_SYMBOL(nf_register_net_hook);
|
||||||
|
* Therefore replace the to-be-removed hook with a dummy hook.
|
||||||
|
*/
|
||||||
|
static void nf_remove_net_hook(struct nf_hook_entries *old,
|
||||||
|
- const struct nf_hook_ops *unreg)
|
||||||
|
+ const struct nf_hook_ops *unreg, int pf)
|
||||||
|
{
|
||||||
|
struct nf_hook_ops **orig_ops;
|
||||||
|
bool found = false;
|
||||||
|
@@ -383,14 +383,14 @@ static void nf_remove_net_hook(struct nf
|
||||||
|
|
||||||
|
if (found) {
|
||||||
|
#ifdef CONFIG_NETFILTER_INGRESS
|
||||||
|
- if (unreg->pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS)
|
||||||
|
+ if (pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS)
|
||||||
|
net_dec_ingress_queue();
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_JUMP_LABEL
|
||||||
|
- static_key_slow_dec(&nf_hooks_needed[unreg->pf][unreg->hooknum]);
|
||||||
|
+ static_key_slow_dec(&nf_hooks_needed[pf][unreg->hooknum]);
|
||||||
|
#endif
|
||||||
|
} else {
|
||||||
|
- WARN_ONCE(1, "hook not found, pf %d num %d", unreg->pf, unreg->hooknum);
|
||||||
|
+ WARN_ONCE(1, "hook not found, pf %d num %d", pf, unreg->hooknum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -411,7 +411,7 @@ void nf_unregister_net_hook(struct net *
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
- nf_remove_net_hook(p, reg);
|
||||||
|
+ nf_remove_net_hook(p, reg, reg->pf);
|
||||||
|
|
||||||
|
p = __nf_hook_entries_try_shrink(pp);
|
||||||
|
mutex_unlock(&nf_hook_mutex);
|
|
@ -0,0 +1,129 @@
|
||||||
|
From cb7ccd835ebb333669e400f99c650e4f3abf11c0 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Sat, 9 Dec 2017 15:30:26 +0100
|
||||||
|
Subject: [PATCH 11/11] netfilter: core: support for NFPROTO_INET hook
|
||||||
|
registration
|
||||||
|
|
||||||
|
Expand NFPROTO_INET in two hook registrations, one for NFPROTO_IPV4 and
|
||||||
|
another for NFPROTO_IPV6. Hence, we handle NFPROTO_INET from the core.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
net/netfilter/core.c | 53 +++++++++++++++++++++++++++++++++++++++++++---------
|
||||||
|
1 file changed, 44 insertions(+), 9 deletions(-)
|
||||||
|
|
||||||
|
--- a/net/netfilter/core.c
|
||||||
|
+++ b/net/netfilter/core.c
|
||||||
|
@@ -310,12 +310,13 @@ nf_hook_entry_head(struct net *net, int
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
-int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
|
||||||
|
+static int __nf_register_net_hook(struct net *net, int pf,
|
||||||
|
+ const struct nf_hook_ops *reg)
|
||||||
|
{
|
||||||
|
struct nf_hook_entries *p, *new_hooks;
|
||||||
|
struct nf_hook_entries __rcu **pp;
|
||||||
|
|
||||||
|
- if (reg->pf == NFPROTO_NETDEV) {
|
||||||
|
+ if (pf == NFPROTO_NETDEV) {
|
||||||
|
#ifndef CONFIG_NETFILTER_INGRESS
|
||||||
|
if (reg->hooknum == NF_NETDEV_INGRESS)
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
@@ -325,7 +326,7 @@ int nf_register_net_hook(struct net *net
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
- pp = nf_hook_entry_head(net, reg->pf, reg->hooknum, reg->dev);
|
||||||
|
+ pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev);
|
||||||
|
if (!pp)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
@@ -343,17 +344,16 @@ int nf_register_net_hook(struct net *net
|
||||||
|
|
||||||
|
hooks_validate(new_hooks);
|
||||||
|
#ifdef CONFIG_NETFILTER_INGRESS
|
||||||
|
- if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
|
||||||
|
+ if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
|
||||||
|
net_inc_ingress_queue();
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_JUMP_LABEL
|
||||||
|
- static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
|
||||||
|
+ static_key_slow_inc(&nf_hooks_needed[pf][reg->hooknum]);
|
||||||
|
#endif
|
||||||
|
BUG_ON(p == new_hooks);
|
||||||
|
nf_hook_entries_free(p);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
-EXPORT_SYMBOL(nf_register_net_hook);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* nf_remove_net_hook - remove a hook from blob
|
||||||
|
@@ -394,12 +394,13 @@ static void nf_remove_net_hook(struct nf
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
-void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
|
||||||
|
+void __nf_unregister_net_hook(struct net *net, int pf,
|
||||||
|
+ const struct nf_hook_ops *reg)
|
||||||
|
{
|
||||||
|
struct nf_hook_entries __rcu **pp;
|
||||||
|
struct nf_hook_entries *p;
|
||||||
|
|
||||||
|
- pp = nf_hook_entry_head(net, reg->pf, reg->hooknum, reg->dev);
|
||||||
|
+ pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev);
|
||||||
|
if (!pp)
|
||||||
|
return;
|
||||||
|
|
||||||
|
@@ -411,7 +412,7 @@ void nf_unregister_net_hook(struct net *
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
- nf_remove_net_hook(p, reg, reg->pf);
|
||||||
|
+ nf_remove_net_hook(p, reg, pf);
|
||||||
|
|
||||||
|
p = __nf_hook_entries_try_shrink(pp);
|
||||||
|
mutex_unlock(&nf_hook_mutex);
|
||||||
|
@@ -421,8 +422,42 @@ void nf_unregister_net_hook(struct net *
|
||||||
|
nf_queue_nf_hook_drop(net);
|
||||||
|
nf_hook_entries_free(p);
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
|
||||||
|
+{
|
||||||
|
+ if (reg->pf == NFPROTO_INET) {
|
||||||
|
+ __nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
|
||||||
|
+ __nf_unregister_net_hook(net, NFPROTO_IPV6, reg);
|
||||||
|
+ } else {
|
||||||
|
+ __nf_unregister_net_hook(net, reg->pf, reg);
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
EXPORT_SYMBOL(nf_unregister_net_hook);
|
||||||
|
|
||||||
|
+int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
|
||||||
|
+{
|
||||||
|
+ int err;
|
||||||
|
+
|
||||||
|
+ if (reg->pf == NFPROTO_INET) {
|
||||||
|
+ err = __nf_register_net_hook(net, NFPROTO_IPV4, reg);
|
||||||
|
+ if (err < 0)
|
||||||
|
+ return err;
|
||||||
|
+
|
||||||
|
+ err = __nf_register_net_hook(net, NFPROTO_IPV6, reg);
|
||||||
|
+ if (err < 0) {
|
||||||
|
+ __nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
|
||||||
|
+ return err;
|
||||||
|
+ }
|
||||||
|
+ } else {
|
||||||
|
+ err = __nf_register_net_hook(net, reg->pf, reg);
|
||||||
|
+ if (err < 0)
|
||||||
|
+ return err;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL(nf_register_net_hook);
|
||||||
|
+
|
||||||
|
int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
|
||||||
|
unsigned int n)
|
||||||
|
{
|
|
@ -0,0 +1,291 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Sun, 10 Dec 2017 01:43:14 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_tables: explicit nft_set_pktinfo() call from
|
||||||
|
hook path
|
||||||
|
|
||||||
|
Instead of calling this function from the family specific variant, this
|
||||||
|
reduces the code size in the fast path for the netdev, bridge and inet
|
||||||
|
families. After this change, we must call nft_set_pktinfo() upfront from
|
||||||
|
the chain hook indirection.
|
||||||
|
|
||||||
|
Before:
|
||||||
|
|
||||||
|
text data bss dec hex filename
|
||||||
|
2145 208 0 2353 931 net/netfilter/nf_tables_netdev.o
|
||||||
|
|
||||||
|
After:
|
||||||
|
|
||||||
|
text data bss dec hex filename
|
||||||
|
2125 208 0 2333 91d net/netfilter/nf_tables_netdev.o
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/net/netfilter/nf_tables.h
|
||||||
|
+++ b/include/net/netfilter/nf_tables.h
|
||||||
|
@@ -54,8 +54,8 @@ static inline void nft_set_pktinfo(struc
|
||||||
|
pkt->xt.state = state;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt,
|
||||||
|
- struct sk_buff *skb)
|
||||||
|
+static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt,
|
||||||
|
+ struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
pkt->tprot_set = false;
|
||||||
|
pkt->tprot = 0;
|
||||||
|
@@ -63,14 +63,6 @@ static inline void nft_set_pktinfo_proto
|
||||||
|
pkt->xt.fragoff = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt,
|
||||||
|
- struct sk_buff *skb,
|
||||||
|
- const struct nf_hook_state *state)
|
||||||
|
-{
|
||||||
|
- nft_set_pktinfo(pkt, skb, state);
|
||||||
|
- nft_set_pktinfo_proto_unspec(pkt, skb);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
/**
|
||||||
|
* struct nft_verdict - nf_tables verdict
|
||||||
|
*
|
||||||
|
--- a/include/net/netfilter/nf_tables_ipv4.h
|
||||||
|
+++ b/include/net/netfilter/nf_tables_ipv4.h
|
||||||
|
@@ -5,15 +5,11 @@
|
||||||
|
#include <net/netfilter/nf_tables.h>
|
||||||
|
#include <net/ip.h>
|
||||||
|
|
||||||
|
-static inline void
|
||||||
|
-nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
|
||||||
|
- struct sk_buff *skb,
|
||||||
|
- const struct nf_hook_state *state)
|
||||||
|
+static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
|
||||||
|
+ struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
struct iphdr *ip;
|
||||||
|
|
||||||
|
- nft_set_pktinfo(pkt, skb, state);
|
||||||
|
-
|
||||||
|
ip = ip_hdr(pkt->skb);
|
||||||
|
pkt->tprot_set = true;
|
||||||
|
pkt->tprot = ip->protocol;
|
||||||
|
@@ -21,10 +17,8 @@ nft_set_pktinfo_ipv4(struct nft_pktinfo
|
||||||
|
pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static inline int
|
||||||
|
-__nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
|
||||||
|
- struct sk_buff *skb,
|
||||||
|
- const struct nf_hook_state *state)
|
||||||
|
+static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
|
||||||
|
+ struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
struct iphdr *iph, _iph;
|
||||||
|
u32 len, thoff;
|
||||||
|
@@ -52,14 +46,11 @@ __nft_set_pktinfo_ipv4_validate(struct n
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static inline void
|
||||||
|
-nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
|
||||||
|
- struct sk_buff *skb,
|
||||||
|
- const struct nf_hook_state *state)
|
||||||
|
+static inline void nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
|
||||||
|
+ struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
- nft_set_pktinfo(pkt, skb, state);
|
||||||
|
- if (__nft_set_pktinfo_ipv4_validate(pkt, skb, state) < 0)
|
||||||
|
- nft_set_pktinfo_proto_unspec(pkt, skb);
|
||||||
|
+ if (__nft_set_pktinfo_ipv4_validate(pkt, skb) < 0)
|
||||||
|
+ nft_set_pktinfo_unspec(pkt, skb);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern struct nft_af_info nft_af_ipv4;
|
||||||
|
--- a/include/net/netfilter/nf_tables_ipv6.h
|
||||||
|
+++ b/include/net/netfilter/nf_tables_ipv6.h
|
||||||
|
@@ -5,20 +5,16 @@
|
||||||
|
#include <linux/netfilter_ipv6/ip6_tables.h>
|
||||||
|
#include <net/ipv6.h>
|
||||||
|
|
||||||
|
-static inline void
|
||||||
|
-nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
|
||||||
|
- struct sk_buff *skb,
|
||||||
|
- const struct nf_hook_state *state)
|
||||||
|
+static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
|
||||||
|
+ struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
unsigned int flags = IP6_FH_F_AUTH;
|
||||||
|
int protohdr, thoff = 0;
|
||||||
|
unsigned short frag_off;
|
||||||
|
|
||||||
|
- nft_set_pktinfo(pkt, skb, state);
|
||||||
|
-
|
||||||
|
protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags);
|
||||||
|
if (protohdr < 0) {
|
||||||
|
- nft_set_pktinfo_proto_unspec(pkt, skb);
|
||||||
|
+ nft_set_pktinfo_unspec(pkt, skb);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -28,10 +24,8 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo
|
||||||
|
pkt->xt.fragoff = frag_off;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static inline int
|
||||||
|
-__nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
|
||||||
|
- struct sk_buff *skb,
|
||||||
|
- const struct nf_hook_state *state)
|
||||||
|
+static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
|
||||||
|
+ struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
#if IS_ENABLED(CONFIG_IPV6)
|
||||||
|
unsigned int flags = IP6_FH_F_AUTH;
|
||||||
|
@@ -68,14 +62,11 @@ __nft_set_pktinfo_ipv6_validate(struct n
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
-static inline void
|
||||||
|
-nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
|
||||||
|
- struct sk_buff *skb,
|
||||||
|
- const struct nf_hook_state *state)
|
||||||
|
+static inline void nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
|
||||||
|
+ struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
- nft_set_pktinfo(pkt, skb, state);
|
||||||
|
- if (__nft_set_pktinfo_ipv6_validate(pkt, skb, state) < 0)
|
||||||
|
- nft_set_pktinfo_proto_unspec(pkt, skb);
|
||||||
|
+ if (__nft_set_pktinfo_ipv6_validate(pkt, skb) < 0)
|
||||||
|
+ nft_set_pktinfo_unspec(pkt, skb);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern struct nft_af_info nft_af_ipv6;
|
||||||
|
--- a/net/bridge/netfilter/nf_tables_bridge.c
|
||||||
|
+++ b/net/bridge/netfilter/nf_tables_bridge.c
|
||||||
|
@@ -25,15 +25,17 @@ nft_do_chain_bridge(void *priv,
|
||||||
|
{
|
||||||
|
struct nft_pktinfo pkt;
|
||||||
|
|
||||||
|
+ nft_set_pktinfo(&pkt, skb, state);
|
||||||
|
+
|
||||||
|
switch (eth_hdr(skb)->h_proto) {
|
||||||
|
case htons(ETH_P_IP):
|
||||||
|
- nft_set_pktinfo_ipv4_validate(&pkt, skb, state);
|
||||||
|
+ nft_set_pktinfo_ipv4_validate(&pkt, skb);
|
||||||
|
break;
|
||||||
|
case htons(ETH_P_IPV6):
|
||||||
|
- nft_set_pktinfo_ipv6_validate(&pkt, skb, state);
|
||||||
|
+ nft_set_pktinfo_ipv6_validate(&pkt, skb);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
- nft_set_pktinfo_unspec(&pkt, skb, state);
|
||||||
|
+ nft_set_pktinfo_unspec(&pkt, skb);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
--- a/net/ipv4/netfilter/nf_tables_arp.c
|
||||||
|
+++ b/net/ipv4/netfilter/nf_tables_arp.c
|
||||||
|
@@ -21,7 +21,8 @@ nft_do_chain_arp(void *priv,
|
||||||
|
{
|
||||||
|
struct nft_pktinfo pkt;
|
||||||
|
|
||||||
|
- nft_set_pktinfo_unspec(&pkt, skb, state);
|
||||||
|
+ nft_set_pktinfo(&pkt, skb, state);
|
||||||
|
+ nft_set_pktinfo_unspec(&pkt, skb);
|
||||||
|
|
||||||
|
return nft_do_chain(&pkt, priv);
|
||||||
|
}
|
||||||
|
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
|
||||||
|
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
|
||||||
|
@@ -24,7 +24,8 @@ static unsigned int nft_do_chain_ipv4(vo
|
||||||
|
{
|
||||||
|
struct nft_pktinfo pkt;
|
||||||
|
|
||||||
|
- nft_set_pktinfo_ipv4(&pkt, skb, state);
|
||||||
|
+ nft_set_pktinfo(&pkt, skb, state);
|
||||||
|
+ nft_set_pktinfo_ipv4(&pkt, skb);
|
||||||
|
|
||||||
|
return nft_do_chain(&pkt, priv);
|
||||||
|
}
|
||||||
|
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
|
||||||
|
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
|
||||||
|
@@ -33,7 +33,8 @@ static unsigned int nft_nat_do_chain(voi
|
||||||
|
{
|
||||||
|
struct nft_pktinfo pkt;
|
||||||
|
|
||||||
|
- nft_set_pktinfo_ipv4(&pkt, skb, state);
|
||||||
|
+ nft_set_pktinfo(&pkt, skb, state);
|
||||||
|
+ nft_set_pktinfo_ipv4(&pkt, skb);
|
||||||
|
|
||||||
|
return nft_do_chain(&pkt, priv);
|
||||||
|
}
|
||||||
|
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
|
||||||
|
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
|
||||||
|
@@ -38,7 +38,8 @@ static unsigned int nf_route_table_hook(
|
||||||
|
ip_hdrlen(skb) < sizeof(struct iphdr))
|
||||||
|
return NF_ACCEPT;
|
||||||
|
|
||||||
|
- nft_set_pktinfo_ipv4(&pkt, skb, state);
|
||||||
|
+ nft_set_pktinfo(&pkt, skb, state);
|
||||||
|
+ nft_set_pktinfo_ipv4(&pkt, skb);
|
||||||
|
|
||||||
|
mark = skb->mark;
|
||||||
|
iph = ip_hdr(skb);
|
||||||
|
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
|
||||||
|
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
|
||||||
|
@@ -22,7 +22,8 @@ static unsigned int nft_do_chain_ipv6(vo
|
||||||
|
{
|
||||||
|
struct nft_pktinfo pkt;
|
||||||
|
|
||||||
|
- nft_set_pktinfo_ipv6(&pkt, skb, state);
|
||||||
|
+ nft_set_pktinfo(&pkt, skb, state);
|
||||||
|
+ nft_set_pktinfo_ipv6(&pkt, skb);
|
||||||
|
|
||||||
|
return nft_do_chain(&pkt, priv);
|
||||||
|
}
|
||||||
|
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
|
||||||
|
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
|
||||||
|
@@ -31,7 +31,8 @@ static unsigned int nft_nat_do_chain(voi
|
||||||
|
{
|
||||||
|
struct nft_pktinfo pkt;
|
||||||
|
|
||||||
|
- nft_set_pktinfo_ipv6(&pkt, skb, state);
|
||||||
|
+ nft_set_pktinfo(&pkt, skb, state);
|
||||||
|
+ nft_set_pktinfo_ipv6(&pkt, skb);
|
||||||
|
|
||||||
|
return nft_do_chain(&pkt, priv);
|
||||||
|
}
|
||||||
|
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
|
||||||
|
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
|
||||||
|
@@ -33,7 +33,8 @@ static unsigned int nf_route_table_hook(
|
||||||
|
u32 mark, flowlabel;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
- nft_set_pktinfo_ipv6(&pkt, skb, state);
|
||||||
|
+ nft_set_pktinfo(&pkt, skb, state);
|
||||||
|
+ nft_set_pktinfo_ipv6(&pkt, skb);
|
||||||
|
|
||||||
|
/* save source/dest address, mark, hoplimit, flowlabel, priority */
|
||||||
|
memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
|
||||||
|
--- a/net/netfilter/nf_tables_netdev.c
|
||||||
|
+++ b/net/netfilter/nf_tables_netdev.c
|
||||||
|
@@ -21,15 +21,17 @@ nft_do_chain_netdev(void *priv, struct s
|
||||||
|
{
|
||||||
|
struct nft_pktinfo pkt;
|
||||||
|
|
||||||
|
+ nft_set_pktinfo(&pkt, skb, state);
|
||||||
|
+
|
||||||
|
switch (skb->protocol) {
|
||||||
|
case htons(ETH_P_IP):
|
||||||
|
- nft_set_pktinfo_ipv4_validate(&pkt, skb, state);
|
||||||
|
+ nft_set_pktinfo_ipv4_validate(&pkt, skb);
|
||||||
|
break;
|
||||||
|
case htons(ETH_P_IPV6):
|
||||||
|
- nft_set_pktinfo_ipv6_validate(&pkt, skb, state);
|
||||||
|
+ nft_set_pktinfo_ipv6_validate(&pkt, skb);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
- nft_set_pktinfo_unspec(&pkt, skb, state);
|
||||||
|
+ nft_set_pktinfo_unspec(&pkt, skb);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,146 @@
|
||||||
|
From: Florian Westphal <fw@strlen.de>
|
||||||
|
Date: Fri, 8 Dec 2017 17:01:54 +0100
|
||||||
|
Subject: [PATCH] netfilter: core: only allow one nat hook per hook point
|
||||||
|
|
||||||
|
The netfilter NAT core cannot deal with more than one NAT hook per hook
|
||||||
|
location (prerouting, input ...), because the NAT hooks install a NAT null
|
||||||
|
binding in case the iptables nat table (iptable_nat hooks) or the
|
||||||
|
corresponding nftables chain (nft nat hooks) doesn't specify a nat
|
||||||
|
transformation.
|
||||||
|
|
||||||
|
Null bindings are needed to detect port collsisions between NAT-ed and
|
||||||
|
non-NAT-ed connections.
|
||||||
|
|
||||||
|
This causes nftables NAT rules to not work when iptable_nat module is
|
||||||
|
loaded, and vice versa because nat binding has already been attached
|
||||||
|
when the second nat hook is consulted.
|
||||||
|
|
||||||
|
The netfilter core is not really the correct location to handle this
|
||||||
|
(hooks are just hooks, the core has no notion of what kinds of side
|
||||||
|
effects a hook implements), but its the only place where we can check
|
||||||
|
for conflicts between both iptables hooks and nftables hooks without
|
||||||
|
adding dependencies.
|
||||||
|
|
||||||
|
So add nat annotation to hook_ops to describe those hooks that will
|
||||||
|
add NAT bindings and then make core reject if such a hook already exists.
|
||||||
|
The annotation fills a padding hole, in case further restrictions appar
|
||||||
|
we might change this to a 'u8 type' instead of bool.
|
||||||
|
|
||||||
|
iptables error if nft nat hook active:
|
||||||
|
iptables -t nat -A POSTROUTING -j MASQUERADE
|
||||||
|
iptables v1.4.21: can't initialize iptables table `nat': File exists
|
||||||
|
Perhaps iptables or your kernel needs to be upgraded.
|
||||||
|
|
||||||
|
nftables error if iptables nat table present:
|
||||||
|
nft -f /etc/nftables/ipv4-nat
|
||||||
|
/usr/etc/nftables/ipv4-nat:3:1-2: Error: Could not process rule: File exists
|
||||||
|
table nat {
|
||||||
|
^^
|
||||||
|
|
||||||
|
Signed-off-by: Florian Westphal <fw@strlen.de>
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/linux/netfilter.h
|
||||||
|
+++ b/include/linux/netfilter.h
|
||||||
|
@@ -67,6 +67,7 @@ struct nf_hook_ops {
|
||||||
|
struct net_device *dev;
|
||||||
|
void *priv;
|
||||||
|
u_int8_t pf;
|
||||||
|
+ bool nat_hook;
|
||||||
|
unsigned int hooknum;
|
||||||
|
/* Hooks are ordered in ascending priority. */
|
||||||
|
int priority;
|
||||||
|
--- a/net/ipv4/netfilter/iptable_nat.c
|
||||||
|
+++ b/net/ipv4/netfilter/iptable_nat.c
|
||||||
|
@@ -72,6 +72,7 @@ static const struct nf_hook_ops nf_nat_i
|
||||||
|
{
|
||||||
|
.hook = iptable_nat_ipv4_in,
|
||||||
|
.pf = NFPROTO_IPV4,
|
||||||
|
+ .nat_hook = true,
|
||||||
|
.hooknum = NF_INET_PRE_ROUTING,
|
||||||
|
.priority = NF_IP_PRI_NAT_DST,
|
||||||
|
},
|
||||||
|
@@ -79,6 +80,7 @@ static const struct nf_hook_ops nf_nat_i
|
||||||
|
{
|
||||||
|
.hook = iptable_nat_ipv4_out,
|
||||||
|
.pf = NFPROTO_IPV4,
|
||||||
|
+ .nat_hook = true,
|
||||||
|
.hooknum = NF_INET_POST_ROUTING,
|
||||||
|
.priority = NF_IP_PRI_NAT_SRC,
|
||||||
|
},
|
||||||
|
@@ -86,6 +88,7 @@ static const struct nf_hook_ops nf_nat_i
|
||||||
|
{
|
||||||
|
.hook = iptable_nat_ipv4_local_fn,
|
||||||
|
.pf = NFPROTO_IPV4,
|
||||||
|
+ .nat_hook = true,
|
||||||
|
.hooknum = NF_INET_LOCAL_OUT,
|
||||||
|
.priority = NF_IP_PRI_NAT_DST,
|
||||||
|
},
|
||||||
|
@@ -93,6 +96,7 @@ static const struct nf_hook_ops nf_nat_i
|
||||||
|
{
|
||||||
|
.hook = iptable_nat_ipv4_fn,
|
||||||
|
.pf = NFPROTO_IPV4,
|
||||||
|
+ .nat_hook = true,
|
||||||
|
.hooknum = NF_INET_LOCAL_IN,
|
||||||
|
.priority = NF_IP_PRI_NAT_SRC,
|
||||||
|
},
|
||||||
|
--- a/net/ipv6/netfilter/ip6table_nat.c
|
||||||
|
+++ b/net/ipv6/netfilter/ip6table_nat.c
|
||||||
|
@@ -74,6 +74,7 @@ static const struct nf_hook_ops nf_nat_i
|
||||||
|
{
|
||||||
|
.hook = ip6table_nat_in,
|
||||||
|
.pf = NFPROTO_IPV6,
|
||||||
|
+ .nat_hook = true,
|
||||||
|
.hooknum = NF_INET_PRE_ROUTING,
|
||||||
|
.priority = NF_IP6_PRI_NAT_DST,
|
||||||
|
},
|
||||||
|
@@ -81,6 +82,7 @@ static const struct nf_hook_ops nf_nat_i
|
||||||
|
{
|
||||||
|
.hook = ip6table_nat_out,
|
||||||
|
.pf = NFPROTO_IPV6,
|
||||||
|
+ .nat_hook = true,
|
||||||
|
.hooknum = NF_INET_POST_ROUTING,
|
||||||
|
.priority = NF_IP6_PRI_NAT_SRC,
|
||||||
|
},
|
||||||
|
@@ -88,12 +90,14 @@ static const struct nf_hook_ops nf_nat_i
|
||||||
|
{
|
||||||
|
.hook = ip6table_nat_local_fn,
|
||||||
|
.pf = NFPROTO_IPV6,
|
||||||
|
+ .nat_hook = true,
|
||||||
|
.hooknum = NF_INET_LOCAL_OUT,
|
||||||
|
.priority = NF_IP6_PRI_NAT_DST,
|
||||||
|
},
|
||||||
|
/* After packet filtering, change source */
|
||||||
|
{
|
||||||
|
.hook = ip6table_nat_fn,
|
||||||
|
+ .nat_hook = true,
|
||||||
|
.pf = NFPROTO_IPV6,
|
||||||
|
.hooknum = NF_INET_LOCAL_IN,
|
||||||
|
.priority = NF_IP6_PRI_NAT_SRC,
|
||||||
|
--- a/net/netfilter/core.c
|
||||||
|
+++ b/net/netfilter/core.c
|
||||||
|
@@ -160,6 +160,12 @@ nf_hook_entries_grow(const struct nf_hoo
|
||||||
|
++i;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+ if (reg->nat_hook && orig_ops[i]->nat_hook) {
|
||||||
|
+ kvfree(new);
|
||||||
|
+ return ERR_PTR(-EEXIST);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
if (inserted || reg->priority > orig_ops[i]->priority) {
|
||||||
|
new_ops[nhooks] = (void *)orig_ops[i];
|
||||||
|
new->hooks[nhooks] = old->hooks[i];
|
||||||
|
--- a/net/netfilter/nf_tables_api.c
|
||||||
|
+++ b/net/netfilter/nf_tables_api.c
|
||||||
|
@@ -1431,6 +1431,8 @@ static int nf_tables_addchain(struct nft
|
||||||
|
ops->hook = hookfn;
|
||||||
|
if (afi->hook_ops_init)
|
||||||
|
afi->hook_ops_init(ops, i);
|
||||||
|
+ if (basechain->type->type == NFT_CHAIN_T_NAT)
|
||||||
|
+ ops->nat_hook = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
chain->flags |= NFT_BASE_CHAIN;
|
|
@ -0,0 +1,161 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Sat, 9 Dec 2017 15:36:24 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_tables_inet: don't use multihook infrastructure
|
||||||
|
anymore
|
||||||
|
|
||||||
|
Use new native NFPROTO_INET support in netfilter core, this gets rid of
|
||||||
|
ad-hoc code in the nf_tables API codebase.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/net/netfilter/nf_tables_ipv4.h
|
||||||
|
+++ b/include/net/netfilter/nf_tables_ipv4.h
|
||||||
|
@@ -53,6 +53,4 @@ static inline void nft_set_pktinfo_ipv4_
|
||||||
|
nft_set_pktinfo_unspec(pkt, skb);
|
||||||
|
}
|
||||||
|
|
||||||
|
-extern struct nft_af_info nft_af_ipv4;
|
||||||
|
-
|
||||||
|
#endif
|
||||||
|
--- a/include/net/netfilter/nf_tables_ipv6.h
|
||||||
|
+++ b/include/net/netfilter/nf_tables_ipv6.h
|
||||||
|
@@ -69,6 +69,4 @@ static inline void nft_set_pktinfo_ipv6_
|
||||||
|
nft_set_pktinfo_unspec(pkt, skb);
|
||||||
|
}
|
||||||
|
|
||||||
|
-extern struct nft_af_info nft_af_ipv6;
|
||||||
|
-
|
||||||
|
#endif
|
||||||
|
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
|
||||||
|
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
|
||||||
|
@@ -45,7 +45,7 @@ static unsigned int nft_ipv4_output(void
|
||||||
|
return nft_do_chain_ipv4(priv, skb, state);
|
||||||
|
}
|
||||||
|
|
||||||
|
-struct nft_af_info nft_af_ipv4 __read_mostly = {
|
||||||
|
+static struct nft_af_info nft_af_ipv4 __read_mostly = {
|
||||||
|
.family = NFPROTO_IPV4,
|
||||||
|
.nhooks = NF_INET_NUMHOOKS,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
@@ -58,7 +58,6 @@ struct nft_af_info nft_af_ipv4 __read_mo
|
||||||
|
[NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
-EXPORT_SYMBOL_GPL(nft_af_ipv4);
|
||||||
|
|
||||||
|
static int nf_tables_ipv4_init_net(struct net *net)
|
||||||
|
{
|
||||||
|
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
|
||||||
|
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
|
||||||
|
@@ -42,7 +42,7 @@ static unsigned int nft_ipv6_output(void
|
||||||
|
return nft_do_chain_ipv6(priv, skb, state);
|
||||||
|
}
|
||||||
|
|
||||||
|
-struct nft_af_info nft_af_ipv6 __read_mostly = {
|
||||||
|
+static struct nft_af_info nft_af_ipv6 __read_mostly = {
|
||||||
|
.family = NFPROTO_IPV6,
|
||||||
|
.nhooks = NF_INET_NUMHOOKS,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
@@ -55,7 +55,6 @@ struct nft_af_info nft_af_ipv6 __read_mo
|
||||||
|
[NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
-EXPORT_SYMBOL_GPL(nft_af_ipv6);
|
||||||
|
|
||||||
|
static int nf_tables_ipv6_init_net(struct net *net)
|
||||||
|
{
|
||||||
|
--- a/net/netfilter/nf_tables_inet.c
|
||||||
|
+++ b/net/netfilter/nf_tables_inet.c
|
||||||
|
@@ -9,6 +9,7 @@
|
||||||
|
#include <linux/init.h>
|
||||||
|
#include <linux/module.h>
|
||||||
|
#include <linux/ip.h>
|
||||||
|
+#include <linux/ipv6.h>
|
||||||
|
#include <linux/netfilter_ipv4.h>
|
||||||
|
#include <linux/netfilter_ipv6.h>
|
||||||
|
#include <net/netfilter/nf_tables.h>
|
||||||
|
@@ -16,26 +17,71 @@
|
||||||
|
#include <net/netfilter/nf_tables_ipv6.h>
|
||||||
|
#include <net/ip.h>
|
||||||
|
|
||||||
|
-static void nft_inet_hook_ops_init(struct nf_hook_ops *ops, unsigned int n)
|
||||||
|
+static unsigned int nft_do_chain_inet(void *priv, struct sk_buff *skb,
|
||||||
|
+ const struct nf_hook_state *state)
|
||||||
|
{
|
||||||
|
- struct nft_af_info *afi;
|
||||||
|
+ struct nft_pktinfo pkt;
|
||||||
|
|
||||||
|
- if (n == 1)
|
||||||
|
- afi = &nft_af_ipv4;
|
||||||
|
- else
|
||||||
|
- afi = &nft_af_ipv6;
|
||||||
|
-
|
||||||
|
- ops->pf = afi->family;
|
||||||
|
- if (afi->hooks[ops->hooknum])
|
||||||
|
- ops->hook = afi->hooks[ops->hooknum];
|
||||||
|
+ nft_set_pktinfo(&pkt, skb, state);
|
||||||
|
+
|
||||||
|
+ switch (state->pf) {
|
||||||
|
+ case NFPROTO_IPV4:
|
||||||
|
+ nft_set_pktinfo_ipv4(&pkt, skb);
|
||||||
|
+ break;
|
||||||
|
+ case NFPROTO_IPV6:
|
||||||
|
+ nft_set_pktinfo_ipv6(&pkt, skb);
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return nft_do_chain(&pkt, priv);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static unsigned int nft_inet_output(void *priv, struct sk_buff *skb,
|
||||||
|
+ const struct nf_hook_state *state)
|
||||||
|
+{
|
||||||
|
+ struct nft_pktinfo pkt;
|
||||||
|
+
|
||||||
|
+ nft_set_pktinfo(&pkt, skb, state);
|
||||||
|
+
|
||||||
|
+ switch (state->pf) {
|
||||||
|
+ case NFPROTO_IPV4:
|
||||||
|
+ if (unlikely(skb->len < sizeof(struct iphdr) ||
|
||||||
|
+ ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) {
|
||||||
|
+ if (net_ratelimit())
|
||||||
|
+ pr_info("ignoring short SOCK_RAW packet\n");
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+ }
|
||||||
|
+ nft_set_pktinfo_ipv4(&pkt, skb);
|
||||||
|
+ break;
|
||||||
|
+ case NFPROTO_IPV6:
|
||||||
|
+ if (unlikely(skb->len < sizeof(struct ipv6hdr))) {
|
||||||
|
+ if (net_ratelimit())
|
||||||
|
+ pr_info("ignoring short SOCK_RAW packet\n");
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+ }
|
||||||
|
+ nft_set_pktinfo_ipv6(&pkt, skb);
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return nft_do_chain(&pkt, priv);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct nft_af_info nft_af_inet __read_mostly = {
|
||||||
|
.family = NFPROTO_INET,
|
||||||
|
.nhooks = NF_INET_NUMHOOKS,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
- .nops = 2,
|
||||||
|
- .hook_ops_init = nft_inet_hook_ops_init,
|
||||||
|
+ .nops = 1,
|
||||||
|
+ .hooks = {
|
||||||
|
+ [NF_INET_LOCAL_IN] = nft_do_chain_inet,
|
||||||
|
+ [NF_INET_LOCAL_OUT] = nft_inet_output,
|
||||||
|
+ [NF_INET_FORWARD] = nft_do_chain_inet,
|
||||||
|
+ [NF_INET_PRE_ROUTING] = nft_do_chain_inet,
|
||||||
|
+ [NF_INET_POST_ROUTING] = nft_do_chain_inet,
|
||||||
|
+ },
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __net_init nf_tables_inet_init_net(struct net *net)
|
|
@ -0,0 +1,390 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Sat, 9 Dec 2017 15:40:25 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_tables: remove multihook chains and families
|
||||||
|
|
||||||
|
Since NFPROTO_INET is handled from the core, we don't need to maintain
|
||||||
|
extra infrastructure in nf_tables to handle the double hook
|
||||||
|
registration, one for IPv4 and another for IPv6.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/net/netfilter/nf_tables.h
|
||||||
|
+++ b/include/net/netfilter/nf_tables.h
|
||||||
|
@@ -897,8 +897,6 @@ struct nft_stats {
|
||||||
|
struct u64_stats_sync syncp;
|
||||||
|
};
|
||||||
|
|
||||||
|
-#define NFT_HOOK_OPS_MAX 2
|
||||||
|
-
|
||||||
|
/**
|
||||||
|
* struct nft_base_chain - nf_tables base chain
|
||||||
|
*
|
||||||
|
@@ -910,7 +908,7 @@ struct nft_stats {
|
||||||
|
* @dev_name: device name that this base chain is attached to (if any)
|
||||||
|
*/
|
||||||
|
struct nft_base_chain {
|
||||||
|
- struct nf_hook_ops ops[NFT_HOOK_OPS_MAX];
|
||||||
|
+ struct nf_hook_ops ops;
|
||||||
|
const struct nf_chain_type *type;
|
||||||
|
u8 policy;
|
||||||
|
u8 flags;
|
||||||
|
@@ -971,8 +969,6 @@ enum nft_af_flags {
|
||||||
|
* @owner: module owner
|
||||||
|
* @tables: used internally
|
||||||
|
* @flags: family flags
|
||||||
|
- * @nops: number of hook ops in this family
|
||||||
|
- * @hook_ops_init: initialization function for chain hook ops
|
||||||
|
* @hooks: hookfn overrides for packet validation
|
||||||
|
*/
|
||||||
|
struct nft_af_info {
|
||||||
|
@@ -982,9 +978,6 @@ struct nft_af_info {
|
||||||
|
struct module *owner;
|
||||||
|
struct list_head tables;
|
||||||
|
u32 flags;
|
||||||
|
- unsigned int nops;
|
||||||
|
- void (*hook_ops_init)(struct nf_hook_ops *,
|
||||||
|
- unsigned int);
|
||||||
|
nf_hookfn *hooks[NF_MAX_HOOKS];
|
||||||
|
};
|
||||||
|
|
||||||
|
--- a/net/bridge/netfilter/nf_tables_bridge.c
|
||||||
|
+++ b/net/bridge/netfilter/nf_tables_bridge.c
|
||||||
|
@@ -46,7 +46,6 @@ static struct nft_af_info nft_af_bridge
|
||||||
|
.family = NFPROTO_BRIDGE,
|
||||||
|
.nhooks = NF_BR_NUMHOOKS,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
- .nops = 1,
|
||||||
|
.hooks = {
|
||||||
|
[NF_BR_PRE_ROUTING] = nft_do_chain_bridge,
|
||||||
|
[NF_BR_LOCAL_IN] = nft_do_chain_bridge,
|
||||||
|
--- a/net/ipv4/netfilter/nf_tables_arp.c
|
||||||
|
+++ b/net/ipv4/netfilter/nf_tables_arp.c
|
||||||
|
@@ -31,7 +31,6 @@ static struct nft_af_info nft_af_arp __r
|
||||||
|
.family = NFPROTO_ARP,
|
||||||
|
.nhooks = NF_ARP_NUMHOOKS,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
- .nops = 1,
|
||||||
|
.hooks = {
|
||||||
|
[NF_ARP_IN] = nft_do_chain_arp,
|
||||||
|
[NF_ARP_OUT] = nft_do_chain_arp,
|
||||||
|
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
|
||||||
|
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
|
||||||
|
@@ -49,7 +49,6 @@ static struct nft_af_info nft_af_ipv4 __
|
||||||
|
.family = NFPROTO_IPV4,
|
||||||
|
.nhooks = NF_INET_NUMHOOKS,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
- .nops = 1,
|
||||||
|
.hooks = {
|
||||||
|
[NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
|
||||||
|
[NF_INET_LOCAL_OUT] = nft_ipv4_output,
|
||||||
|
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
|
||||||
|
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
|
||||||
|
@@ -46,7 +46,6 @@ static struct nft_af_info nft_af_ipv6 __
|
||||||
|
.family = NFPROTO_IPV6,
|
||||||
|
.nhooks = NF_INET_NUMHOOKS,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
- .nops = 1,
|
||||||
|
.hooks = {
|
||||||
|
[NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
|
||||||
|
[NF_INET_LOCAL_OUT] = nft_ipv6_output,
|
||||||
|
--- a/net/netfilter/nf_tables_api.c
|
||||||
|
+++ b/net/netfilter/nf_tables_api.c
|
||||||
|
@@ -139,29 +139,26 @@ static void nft_trans_destroy(struct nft
|
||||||
|
kfree(trans);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static int nf_tables_register_hooks(struct net *net,
|
||||||
|
- const struct nft_table *table,
|
||||||
|
- struct nft_chain *chain,
|
||||||
|
- unsigned int hook_nops)
|
||||||
|
+static int nf_tables_register_hook(struct net *net,
|
||||||
|
+ const struct nft_table *table,
|
||||||
|
+ struct nft_chain *chain)
|
||||||
|
{
|
||||||
|
if (table->flags & NFT_TABLE_F_DORMANT ||
|
||||||
|
!nft_is_base_chain(chain))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
- return nf_register_net_hooks(net, nft_base_chain(chain)->ops,
|
||||||
|
- hook_nops);
|
||||||
|
+ return nf_register_net_hook(net, &nft_base_chain(chain)->ops);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static void nf_tables_unregister_hooks(struct net *net,
|
||||||
|
- const struct nft_table *table,
|
||||||
|
- struct nft_chain *chain,
|
||||||
|
- unsigned int hook_nops)
|
||||||
|
+static void nf_tables_unregister_hook(struct net *net,
|
||||||
|
+ const struct nft_table *table,
|
||||||
|
+ struct nft_chain *chain)
|
||||||
|
{
|
||||||
|
if (table->flags & NFT_TABLE_F_DORMANT ||
|
||||||
|
!nft_is_base_chain(chain))
|
||||||
|
return;
|
||||||
|
|
||||||
|
- nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, hook_nops);
|
||||||
|
+ nf_unregister_net_hook(net, &nft_base_chain(chain)->ops);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
|
||||||
|
@@ -624,8 +621,7 @@ static void _nf_tables_table_disable(str
|
||||||
|
if (cnt && i++ == cnt)
|
||||||
|
break;
|
||||||
|
|
||||||
|
- nf_unregister_net_hooks(net, nft_base_chain(chain)->ops,
|
||||||
|
- afi->nops);
|
||||||
|
+ nf_unregister_net_hook(net, &nft_base_chain(chain)->ops);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -642,8 +638,7 @@ static int nf_tables_table_enable(struct
|
||||||
|
if (!nft_is_base_chain(chain))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
- err = nf_register_net_hooks(net, nft_base_chain(chain)->ops,
|
||||||
|
- afi->nops);
|
||||||
|
+ err = nf_register_net_hook(net, &nft_base_chain(chain)->ops);
|
||||||
|
if (err < 0)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
@@ -1055,7 +1050,7 @@ static int nf_tables_fill_chain_info(str
|
||||||
|
|
||||||
|
if (nft_is_base_chain(chain)) {
|
||||||
|
const struct nft_base_chain *basechain = nft_base_chain(chain);
|
||||||
|
- const struct nf_hook_ops *ops = &basechain->ops[0];
|
||||||
|
+ const struct nf_hook_ops *ops = &basechain->ops;
|
||||||
|
struct nlattr *nest;
|
||||||
|
|
||||||
|
nest = nla_nest_start(skb, NFTA_CHAIN_HOOK);
|
||||||
|
@@ -1283,8 +1278,8 @@ static void nf_tables_chain_destroy(stru
|
||||||
|
free_percpu(basechain->stats);
|
||||||
|
if (basechain->stats)
|
||||||
|
static_branch_dec(&nft_counters_enabled);
|
||||||
|
- if (basechain->ops[0].dev != NULL)
|
||||||
|
- dev_put(basechain->ops[0].dev);
|
||||||
|
+ if (basechain->ops.dev != NULL)
|
||||||
|
+ dev_put(basechain->ops.dev);
|
||||||
|
kfree(chain->name);
|
||||||
|
kfree(basechain);
|
||||||
|
} else {
|
||||||
|
@@ -1380,7 +1375,6 @@ static int nf_tables_addchain(struct nft
|
||||||
|
struct nft_stats __percpu *stats;
|
||||||
|
struct net *net = ctx->net;
|
||||||
|
struct nft_chain *chain;
|
||||||
|
- unsigned int i;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
if (table->use == UINT_MAX)
|
||||||
|
@@ -1419,21 +1413,18 @@ static int nf_tables_addchain(struct nft
|
||||||
|
basechain->type = hook.type;
|
||||||
|
chain = &basechain->chain;
|
||||||
|
|
||||||
|
- for (i = 0; i < afi->nops; i++) {
|
||||||
|
- ops = &basechain->ops[i];
|
||||||
|
- ops->pf = family;
|
||||||
|
- ops->hooknum = hook.num;
|
||||||
|
- ops->priority = hook.priority;
|
||||||
|
- ops->priv = chain;
|
||||||
|
- ops->hook = afi->hooks[ops->hooknum];
|
||||||
|
- ops->dev = hook.dev;
|
||||||
|
- if (hookfn)
|
||||||
|
- ops->hook = hookfn;
|
||||||
|
- if (afi->hook_ops_init)
|
||||||
|
- afi->hook_ops_init(ops, i);
|
||||||
|
- if (basechain->type->type == NFT_CHAIN_T_NAT)
|
||||||
|
- ops->nat_hook = true;
|
||||||
|
- }
|
||||||
|
+ ops = &basechain->ops;
|
||||||
|
+ ops->pf = family;
|
||||||
|
+ ops->hooknum = hook.num;
|
||||||
|
+ ops->priority = hook.priority;
|
||||||
|
+ ops->priv = chain;
|
||||||
|
+ ops->hook = afi->hooks[ops->hooknum];
|
||||||
|
+ ops->dev = hook.dev;
|
||||||
|
+ if (hookfn)
|
||||||
|
+ ops->hook = hookfn;
|
||||||
|
+
|
||||||
|
+ if (basechain->type->type == NFT_CHAIN_T_NAT)
|
||||||
|
+ ops->nat_hook = true;
|
||||||
|
|
||||||
|
chain->flags |= NFT_BASE_CHAIN;
|
||||||
|
basechain->policy = policy;
|
||||||
|
@@ -1451,7 +1442,7 @@ static int nf_tables_addchain(struct nft
|
||||||
|
goto err1;
|
||||||
|
}
|
||||||
|
|
||||||
|
- err = nf_tables_register_hooks(net, table, chain, afi->nops);
|
||||||
|
+ err = nf_tables_register_hook(net, table, chain);
|
||||||
|
if (err < 0)
|
||||||
|
goto err1;
|
||||||
|
|
||||||
|
@@ -1465,7 +1456,7 @@ static int nf_tables_addchain(struct nft
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
err2:
|
||||||
|
- nf_tables_unregister_hooks(net, table, chain, afi->nops);
|
||||||
|
+ nf_tables_unregister_hook(net, table, chain);
|
||||||
|
err1:
|
||||||
|
nf_tables_chain_destroy(chain);
|
||||||
|
|
||||||
|
@@ -1478,13 +1469,12 @@ static int nf_tables_updchain(struct nft
|
||||||
|
const struct nlattr * const *nla = ctx->nla;
|
||||||
|
struct nft_table *table = ctx->table;
|
||||||
|
struct nft_chain *chain = ctx->chain;
|
||||||
|
- struct nft_af_info *afi = ctx->afi;
|
||||||
|
struct nft_base_chain *basechain;
|
||||||
|
struct nft_stats *stats = NULL;
|
||||||
|
struct nft_chain_hook hook;
|
||||||
|
struct nf_hook_ops *ops;
|
||||||
|
struct nft_trans *trans;
|
||||||
|
- int err, i;
|
||||||
|
+ int err;
|
||||||
|
|
||||||
|
if (nla[NFTA_CHAIN_HOOK]) {
|
||||||
|
if (!nft_is_base_chain(chain))
|
||||||
|
@@ -1501,14 +1491,12 @@ static int nf_tables_updchain(struct nft
|
||||||
|
return -EBUSY;
|
||||||
|
}
|
||||||
|
|
||||||
|
- for (i = 0; i < afi->nops; i++) {
|
||||||
|
- ops = &basechain->ops[i];
|
||||||
|
- if (ops->hooknum != hook.num ||
|
||||||
|
- ops->priority != hook.priority ||
|
||||||
|
- ops->dev != hook.dev) {
|
||||||
|
- nft_chain_release_hook(&hook);
|
||||||
|
- return -EBUSY;
|
||||||
|
- }
|
||||||
|
+ ops = &basechain->ops;
|
||||||
|
+ if (ops->hooknum != hook.num ||
|
||||||
|
+ ops->priority != hook.priority ||
|
||||||
|
+ ops->dev != hook.dev) {
|
||||||
|
+ nft_chain_release_hook(&hook);
|
||||||
|
+ return -EBUSY;
|
||||||
|
}
|
||||||
|
nft_chain_release_hook(&hook);
|
||||||
|
}
|
||||||
|
@@ -5135,10 +5123,9 @@ static int nf_tables_commit(struct net *
|
||||||
|
case NFT_MSG_DELCHAIN:
|
||||||
|
list_del_rcu(&trans->ctx.chain->list);
|
||||||
|
nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
|
||||||
|
- nf_tables_unregister_hooks(trans->ctx.net,
|
||||||
|
- trans->ctx.table,
|
||||||
|
- trans->ctx.chain,
|
||||||
|
- trans->ctx.afi->nops);
|
||||||
|
+ nf_tables_unregister_hook(trans->ctx.net,
|
||||||
|
+ trans->ctx.table,
|
||||||
|
+ trans->ctx.chain);
|
||||||
|
break;
|
||||||
|
case NFT_MSG_NEWRULE:
|
||||||
|
nft_clear(trans->ctx.net, nft_trans_rule(trans));
|
||||||
|
@@ -5275,10 +5262,9 @@ static int nf_tables_abort(struct net *n
|
||||||
|
} else {
|
||||||
|
trans->ctx.table->use--;
|
||||||
|
list_del_rcu(&trans->ctx.chain->list);
|
||||||
|
- nf_tables_unregister_hooks(trans->ctx.net,
|
||||||
|
- trans->ctx.table,
|
||||||
|
- trans->ctx.chain,
|
||||||
|
- trans->ctx.afi->nops);
|
||||||
|
+ nf_tables_unregister_hook(trans->ctx.net,
|
||||||
|
+ trans->ctx.table,
|
||||||
|
+ trans->ctx.chain);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case NFT_MSG_DELCHAIN:
|
||||||
|
@@ -5381,7 +5367,7 @@ int nft_chain_validate_hooks(const struc
|
||||||
|
if (nft_is_base_chain(chain)) {
|
||||||
|
basechain = nft_base_chain(chain);
|
||||||
|
|
||||||
|
- if ((1 << basechain->ops[0].hooknum) & hook_flags)
|
||||||
|
+ if ((1 << basechain->ops.hooknum) & hook_flags)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
@@ -5863,8 +5849,7 @@ int __nft_release_basechain(struct nft_c
|
||||||
|
|
||||||
|
BUG_ON(!nft_is_base_chain(ctx->chain));
|
||||||
|
|
||||||
|
- nf_tables_unregister_hooks(ctx->net, ctx->chain->table, ctx->chain,
|
||||||
|
- ctx->afi->nops);
|
||||||
|
+ nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain);
|
||||||
|
list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
|
||||||
|
list_del(&rule->list);
|
||||||
|
ctx->chain->use--;
|
||||||
|
@@ -5893,8 +5878,7 @@ static void __nft_release_afinfo(struct
|
||||||
|
|
||||||
|
list_for_each_entry_safe(table, nt, &afi->tables, list) {
|
||||||
|
list_for_each_entry(chain, &table->chains, list)
|
||||||
|
- nf_tables_unregister_hooks(net, table, chain,
|
||||||
|
- afi->nops);
|
||||||
|
+ nf_tables_unregister_hook(net, table, chain);
|
||||||
|
/* No packets are walking on these chains anymore. */
|
||||||
|
ctx.table = table;
|
||||||
|
list_for_each_entry(chain, &table->chains, list) {
|
||||||
|
--- a/net/netfilter/nf_tables_inet.c
|
||||||
|
+++ b/net/netfilter/nf_tables_inet.c
|
||||||
|
@@ -74,7 +74,6 @@ static struct nft_af_info nft_af_inet __
|
||||||
|
.family = NFPROTO_INET,
|
||||||
|
.nhooks = NF_INET_NUMHOOKS,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
- .nops = 1,
|
||||||
|
.hooks = {
|
||||||
|
[NF_INET_LOCAL_IN] = nft_do_chain_inet,
|
||||||
|
[NF_INET_LOCAL_OUT] = nft_inet_output,
|
||||||
|
--- a/net/netfilter/nf_tables_netdev.c
|
||||||
|
+++ b/net/netfilter/nf_tables_netdev.c
|
||||||
|
@@ -43,7 +43,6 @@ static struct nft_af_info nft_af_netdev
|
||||||
|
.nhooks = NF_NETDEV_NUMHOOKS,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
.flags = NFT_AF_NEEDS_DEV,
|
||||||
|
- .nops = 1,
|
||||||
|
.hooks = {
|
||||||
|
[NF_NETDEV_INGRESS] = nft_do_chain_netdev,
|
||||||
|
},
|
||||||
|
@@ -98,7 +97,7 @@ static void nft_netdev_event(unsigned lo
|
||||||
|
__nft_release_basechain(ctx);
|
||||||
|
break;
|
||||||
|
case NETDEV_CHANGENAME:
|
||||||
|
- if (dev->ifindex != basechain->ops[0].dev->ifindex)
|
||||||
|
+ if (dev->ifindex != basechain->ops.dev->ifindex)
|
||||||
|
return;
|
||||||
|
|
||||||
|
strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
|
||||||
|
--- a/net/netfilter/nft_compat.c
|
||||||
|
+++ b/net/netfilter/nft_compat.c
|
||||||
|
@@ -186,7 +186,7 @@ nft_target_set_tgchk_param(struct xt_tgc
|
||||||
|
if (nft_is_base_chain(ctx->chain)) {
|
||||||
|
const struct nft_base_chain *basechain =
|
||||||
|
nft_base_chain(ctx->chain);
|
||||||
|
- const struct nf_hook_ops *ops = &basechain->ops[0];
|
||||||
|
+ const struct nf_hook_ops *ops = &basechain->ops;
|
||||||
|
|
||||||
|
par->hook_mask = 1 << ops->hooknum;
|
||||||
|
} else {
|
||||||
|
@@ -317,7 +317,7 @@ static int nft_target_validate(const str
|
||||||
|
if (nft_is_base_chain(ctx->chain)) {
|
||||||
|
const struct nft_base_chain *basechain =
|
||||||
|
nft_base_chain(ctx->chain);
|
||||||
|
- const struct nf_hook_ops *ops = &basechain->ops[0];
|
||||||
|
+ const struct nf_hook_ops *ops = &basechain->ops;
|
||||||
|
|
||||||
|
hook_mask = 1 << ops->hooknum;
|
||||||
|
if (target->hooks && !(hook_mask & target->hooks))
|
||||||
|
@@ -414,7 +414,7 @@ nft_match_set_mtchk_param(struct xt_mtch
|
||||||
|
if (nft_is_base_chain(ctx->chain)) {
|
||||||
|
const struct nft_base_chain *basechain =
|
||||||
|
nft_base_chain(ctx->chain);
|
||||||
|
- const struct nf_hook_ops *ops = &basechain->ops[0];
|
||||||
|
+ const struct nf_hook_ops *ops = &basechain->ops;
|
||||||
|
|
||||||
|
par->hook_mask = 1 << ops->hooknum;
|
||||||
|
} else {
|
||||||
|
@@ -564,7 +564,7 @@ static int nft_match_validate(const stru
|
||||||
|
if (nft_is_base_chain(ctx->chain)) {
|
||||||
|
const struct nft_base_chain *basechain =
|
||||||
|
nft_base_chain(ctx->chain);
|
||||||
|
- const struct nf_hook_ops *ops = &basechain->ops[0];
|
||||||
|
+ const struct nf_hook_ops *ops = &basechain->ops;
|
||||||
|
|
||||||
|
hook_mask = 1 << ops->hooknum;
|
||||||
|
if (match->hooks && !(hook_mask & match->hooks))
|
|
@ -0,0 +1,171 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Mon, 27 Nov 2017 21:55:14 +0100
|
||||||
|
Subject: [PATCH] netfilter: move checksum indirection to struct nf_ipv6_ops
|
||||||
|
|
||||||
|
We cannot make a direct call to nf_ip6_checksum() because that would
|
||||||
|
result in autoloading the 'ipv6' module because of symbol dependencies.
|
||||||
|
Therefore, define checksum indirection in nf_ipv6_ops where this really
|
||||||
|
belongs to.
|
||||||
|
|
||||||
|
For IPv4, we can indeed make a direct function call, which is faster,
|
||||||
|
given IPv4 is built-in in the networking code by default. Still,
|
||||||
|
CONFIG_INET=n and CONFIG_NETFILTER=y is possible, so define empty inline
|
||||||
|
stub for IPv4 in such case.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
create mode 100644 net/netfilter/utils.c
|
||||||
|
|
||||||
|
--- a/include/linux/netfilter.h
|
||||||
|
+++ b/include/linux/netfilter.h
|
||||||
|
@@ -311,8 +311,6 @@ struct nf_queue_entry;
|
||||||
|
|
||||||
|
struct nf_afinfo {
|
||||||
|
unsigned short family;
|
||||||
|
- __sum16 (*checksum)(struct sk_buff *skb, unsigned int hook,
|
||||||
|
- unsigned int dataoff, u_int8_t protocol);
|
||||||
|
__sum16 (*checksum_partial)(struct sk_buff *skb,
|
||||||
|
unsigned int hook,
|
||||||
|
unsigned int dataoff,
|
||||||
|
@@ -333,20 +331,9 @@ static inline const struct nf_afinfo *nf
|
||||||
|
return rcu_dereference(nf_afinfo[family]);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static inline __sum16
|
||||||
|
-nf_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff,
|
||||||
|
- u_int8_t protocol, unsigned short family)
|
||||||
|
-{
|
||||||
|
- const struct nf_afinfo *afinfo;
|
||||||
|
- __sum16 csum = 0;
|
||||||
|
-
|
||||||
|
- rcu_read_lock();
|
||||||
|
- afinfo = nf_get_afinfo(family);
|
||||||
|
- if (afinfo)
|
||||||
|
- csum = afinfo->checksum(skb, hook, dataoff, protocol);
|
||||||
|
- rcu_read_unlock();
|
||||||
|
- return csum;
|
||||||
|
-}
|
||||||
|
+__sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
|
||||||
|
+ unsigned int dataoff, u_int8_t protocol,
|
||||||
|
+ unsigned short family);
|
||||||
|
|
||||||
|
static inline __sum16
|
||||||
|
nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
|
||||||
|
--- a/include/linux/netfilter_ipv4.h
|
||||||
|
+++ b/include/linux/netfilter_ipv4.h
|
||||||
|
@@ -7,6 +7,16 @@
|
||||||
|
#include <uapi/linux/netfilter_ipv4.h>
|
||||||
|
|
||||||
|
int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned addr_type);
|
||||||
|
+
|
||||||
|
+#ifdef CONFIG_INET
|
||||||
|
__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
|
||||||
|
unsigned int dataoff, u_int8_t protocol);
|
||||||
|
+#else
|
||||||
|
+static inline __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
|
||||||
|
+ unsigned int dataoff, u_int8_t protocol)
|
||||||
|
+{
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+#endif /* CONFIG_INET */
|
||||||
|
+
|
||||||
|
#endif /*__LINUX_IP_NETFILTER_H*/
|
||||||
|
--- a/include/linux/netfilter_ipv6.h
|
||||||
|
+++ b/include/linux/netfilter_ipv6.h
|
||||||
|
@@ -19,6 +19,8 @@ struct nf_ipv6_ops {
|
||||||
|
void (*route_input)(struct sk_buff *skb);
|
||||||
|
int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb,
|
||||||
|
int (*output)(struct net *, struct sock *, struct sk_buff *));
|
||||||
|
+ __sum16 (*checksum)(struct sk_buff *skb, unsigned int hook,
|
||||||
|
+ unsigned int dataoff, u_int8_t protocol);
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_NETFILTER
|
||||||
|
--- a/net/bridge/netfilter/nf_tables_bridge.c
|
||||||
|
+++ b/net/bridge/netfilter/nf_tables_bridge.c
|
||||||
|
@@ -106,12 +106,6 @@ static int nf_br_reroute(struct net *net
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static __sum16 nf_br_checksum(struct sk_buff *skb, unsigned int hook,
|
||||||
|
- unsigned int dataoff, u_int8_t protocol)
|
||||||
|
-{
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static __sum16 nf_br_checksum_partial(struct sk_buff *skb, unsigned int hook,
|
||||||
|
unsigned int dataoff, unsigned int len,
|
||||||
|
u_int8_t protocol)
|
||||||
|
@@ -127,7 +121,6 @@ static int nf_br_route(struct net *net,
|
||||||
|
|
||||||
|
static const struct nf_afinfo nf_br_afinfo = {
|
||||||
|
.family = AF_BRIDGE,
|
||||||
|
- .checksum = nf_br_checksum,
|
||||||
|
.checksum_partial = nf_br_checksum_partial,
|
||||||
|
.route = nf_br_route,
|
||||||
|
.saveroute = nf_br_saveroute,
|
||||||
|
--- a/net/ipv4/netfilter.c
|
||||||
|
+++ b/net/ipv4/netfilter.c
|
||||||
|
@@ -188,7 +188,6 @@ static int nf_ip_route(struct net *net,
|
||||||
|
|
||||||
|
static const struct nf_afinfo nf_ip_afinfo = {
|
||||||
|
.family = AF_INET,
|
||||||
|
- .checksum = nf_ip_checksum,
|
||||||
|
.checksum_partial = nf_ip_checksum_partial,
|
||||||
|
.route = nf_ip_route,
|
||||||
|
.saveroute = nf_ip_saveroute,
|
||||||
|
--- a/net/ipv6/netfilter.c
|
||||||
|
+++ b/net/ipv6/netfilter.c
|
||||||
|
@@ -193,12 +193,12 @@ static __sum16 nf_ip6_checksum_partial(s
|
||||||
|
static const struct nf_ipv6_ops ipv6ops = {
|
||||||
|
.chk_addr = ipv6_chk_addr,
|
||||||
|
.route_input = ip6_route_input,
|
||||||
|
- .fragment = ip6_fragment
|
||||||
|
+ .fragment = ip6_fragment,
|
||||||
|
+ .checksum = nf_ip6_checksum,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct nf_afinfo nf_ip6_afinfo = {
|
||||||
|
.family = AF_INET6,
|
||||||
|
- .checksum = nf_ip6_checksum,
|
||||||
|
.checksum_partial = nf_ip6_checksum_partial,
|
||||||
|
.route = nf_ip6_route,
|
||||||
|
.saveroute = nf_ip6_saveroute,
|
||||||
|
--- a/net/netfilter/Makefile
|
||||||
|
+++ b/net/netfilter/Makefile
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
# SPDX-License-Identifier: GPL-2.0
|
||||||
|
-netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
|
||||||
|
+netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o utils.o
|
||||||
|
|
||||||
|
nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
|
||||||
|
nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/net/netfilter/utils.c
|
||||||
|
@@ -0,0 +1,26 @@
|
||||||
|
+#include <linux/kernel.h>
|
||||||
|
+#include <linux/netfilter.h>
|
||||||
|
+#include <linux/netfilter_ipv4.h>
|
||||||
|
+#include <linux/netfilter_ipv6.h>
|
||||||
|
+
|
||||||
|
+__sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
|
||||||
|
+ unsigned int dataoff, u_int8_t protocol,
|
||||||
|
+ unsigned short family)
|
||||||
|
+{
|
||||||
|
+ const struct nf_ipv6_ops *v6ops;
|
||||||
|
+ __sum16 csum = 0;
|
||||||
|
+
|
||||||
|
+ switch (family) {
|
||||||
|
+ case AF_INET:
|
||||||
|
+ csum = nf_ip_checksum(skb, hook, dataoff, protocol);
|
||||||
|
+ break;
|
||||||
|
+ case AF_INET6:
|
||||||
|
+ v6ops = rcu_dereference(nf_ipv6_ops);
|
||||||
|
+ if (v6ops)
|
||||||
|
+ csum = v6ops->checksum(skb, hook, dataoff, protocol);
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return csum;
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(nf_checksum);
|
|
@ -0,0 +1,204 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Wed, 20 Dec 2017 16:04:18 +0100
|
||||||
|
Subject: [PATCH] netfilter: move checksum_partial indirection to struct
|
||||||
|
nf_ipv6_ops
|
||||||
|
|
||||||
|
We cannot make a direct call to nf_ip6_checksum_partial() because that
|
||||||
|
would result in autoloading the 'ipv6' module because of symbol
|
||||||
|
dependencies. Therefore, define checksum_partial indirection in
|
||||||
|
nf_ipv6_ops where this really belongs to.
|
||||||
|
|
||||||
|
For IPv4, we can indeed make a direct function call, which is faster,
|
||||||
|
given IPv4 is built-in in the networking code by default. Still,
|
||||||
|
CONFIG_INET=n and CONFIG_NETFILTER=y is possible, so define empty inline
|
||||||
|
stub for IPv4 in such case.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/linux/netfilter.h
|
||||||
|
+++ b/include/linux/netfilter.h
|
||||||
|
@@ -311,11 +311,6 @@ struct nf_queue_entry;
|
||||||
|
|
||||||
|
struct nf_afinfo {
|
||||||
|
unsigned short family;
|
||||||
|
- __sum16 (*checksum_partial)(struct sk_buff *skb,
|
||||||
|
- unsigned int hook,
|
||||||
|
- unsigned int dataoff,
|
||||||
|
- unsigned int len,
|
||||||
|
- u_int8_t protocol);
|
||||||
|
int (*route)(struct net *net, struct dst_entry **dst,
|
||||||
|
struct flowi *fl, bool strict);
|
||||||
|
void (*saveroute)(const struct sk_buff *skb,
|
||||||
|
@@ -335,22 +330,9 @@ __sum16 nf_checksum(struct sk_buff *skb,
|
||||||
|
unsigned int dataoff, u_int8_t protocol,
|
||||||
|
unsigned short family);
|
||||||
|
|
||||||
|
-static inline __sum16
|
||||||
|
-nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
|
||||||
|
- unsigned int dataoff, unsigned int len,
|
||||||
|
- u_int8_t protocol, unsigned short family)
|
||||||
|
-{
|
||||||
|
- const struct nf_afinfo *afinfo;
|
||||||
|
- __sum16 csum = 0;
|
||||||
|
-
|
||||||
|
- rcu_read_lock();
|
||||||
|
- afinfo = nf_get_afinfo(family);
|
||||||
|
- if (afinfo)
|
||||||
|
- csum = afinfo->checksum_partial(skb, hook, dataoff, len,
|
||||||
|
- protocol);
|
||||||
|
- rcu_read_unlock();
|
||||||
|
- return csum;
|
||||||
|
-}
|
||||||
|
+__sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
|
||||||
|
+ unsigned int dataoff, unsigned int len,
|
||||||
|
+ u_int8_t protocol, unsigned short family);
|
||||||
|
|
||||||
|
int nf_register_afinfo(const struct nf_afinfo *afinfo);
|
||||||
|
void nf_unregister_afinfo(const struct nf_afinfo *afinfo);
|
||||||
|
--- a/include/linux/netfilter_ipv4.h
|
||||||
|
+++ b/include/linux/netfilter_ipv4.h
|
||||||
|
@@ -11,12 +11,23 @@ int ip_route_me_harder(struct net *net,
|
||||||
|
#ifdef CONFIG_INET
|
||||||
|
__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
|
||||||
|
unsigned int dataoff, u_int8_t protocol);
|
||||||
|
+__sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
|
||||||
|
+ unsigned int dataoff, unsigned int len,
|
||||||
|
+ u_int8_t protocol);
|
||||||
|
#else
|
||||||
|
static inline __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
|
||||||
|
unsigned int dataoff, u_int8_t protocol)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
+static inline __sum16 nf_ip_checksum_partial(struct sk_buff *skb,
|
||||||
|
+ unsigned int hook,
|
||||||
|
+ unsigned int dataoff,
|
||||||
|
+ unsigned int len,
|
||||||
|
+ u_int8_t protocol)
|
||||||
|
+{
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
#endif /* CONFIG_INET */
|
||||||
|
|
||||||
|
#endif /*__LINUX_IP_NETFILTER_H*/
|
||||||
|
--- a/include/linux/netfilter_ipv6.h
|
||||||
|
+++ b/include/linux/netfilter_ipv6.h
|
||||||
|
@@ -21,6 +21,9 @@ struct nf_ipv6_ops {
|
||||||
|
int (*output)(struct net *, struct sock *, struct sk_buff *));
|
||||||
|
__sum16 (*checksum)(struct sk_buff *skb, unsigned int hook,
|
||||||
|
unsigned int dataoff, u_int8_t protocol);
|
||||||
|
+ __sum16 (*checksum_partial)(struct sk_buff *skb, unsigned int hook,
|
||||||
|
+ unsigned int dataoff, unsigned int len,
|
||||||
|
+ u_int8_t protocol);
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_NETFILTER
|
||||||
|
--- a/net/bridge/netfilter/nf_tables_bridge.c
|
||||||
|
+++ b/net/bridge/netfilter/nf_tables_bridge.c
|
||||||
|
@@ -106,13 +106,6 @@ static int nf_br_reroute(struct net *net
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static __sum16 nf_br_checksum_partial(struct sk_buff *skb, unsigned int hook,
|
||||||
|
- unsigned int dataoff, unsigned int len,
|
||||||
|
- u_int8_t protocol)
|
||||||
|
-{
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static int nf_br_route(struct net *net, struct dst_entry **dst,
|
||||||
|
struct flowi *fl, bool strict __always_unused)
|
||||||
|
{
|
||||||
|
@@ -121,7 +114,6 @@ static int nf_br_route(struct net *net,
|
||||||
|
|
||||||
|
static const struct nf_afinfo nf_br_afinfo = {
|
||||||
|
.family = AF_BRIDGE,
|
||||||
|
- .checksum_partial = nf_br_checksum_partial,
|
||||||
|
.route = nf_br_route,
|
||||||
|
.saveroute = nf_br_saveroute,
|
||||||
|
.reroute = nf_br_reroute,
|
||||||
|
--- a/net/ipv4/netfilter.c
|
||||||
|
+++ b/net/ipv4/netfilter.c
|
||||||
|
@@ -155,9 +155,9 @@ __sum16 nf_ip_checksum(struct sk_buff *s
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(nf_ip_checksum);
|
||||||
|
|
||||||
|
-static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
|
||||||
|
- unsigned int dataoff, unsigned int len,
|
||||||
|
- u_int8_t protocol)
|
||||||
|
+__sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
|
||||||
|
+ unsigned int dataoff, unsigned int len,
|
||||||
|
+ u_int8_t protocol)
|
||||||
|
{
|
||||||
|
const struct iphdr *iph = ip_hdr(skb);
|
||||||
|
__sum16 csum = 0;
|
||||||
|
@@ -175,6 +175,7 @@ static __sum16 nf_ip_checksum_partial(st
|
||||||
|
}
|
||||||
|
return csum;
|
||||||
|
}
|
||||||
|
+EXPORT_SYMBOL_GPL(nf_ip_checksum_partial);
|
||||||
|
|
||||||
|
static int nf_ip_route(struct net *net, struct dst_entry **dst,
|
||||||
|
struct flowi *fl, bool strict __always_unused)
|
||||||
|
@@ -188,7 +189,6 @@ static int nf_ip_route(struct net *net,
|
||||||
|
|
||||||
|
static const struct nf_afinfo nf_ip_afinfo = {
|
||||||
|
.family = AF_INET,
|
||||||
|
- .checksum_partial = nf_ip_checksum_partial,
|
||||||
|
.route = nf_ip_route,
|
||||||
|
.saveroute = nf_ip_saveroute,
|
||||||
|
.reroute = nf_ip_reroute,
|
||||||
|
--- a/net/ipv6/netfilter.c
|
||||||
|
+++ b/net/ipv6/netfilter.c
|
||||||
|
@@ -191,15 +191,15 @@ static __sum16 nf_ip6_checksum_partial(s
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct nf_ipv6_ops ipv6ops = {
|
||||||
|
- .chk_addr = ipv6_chk_addr,
|
||||||
|
- .route_input = ip6_route_input,
|
||||||
|
- .fragment = ip6_fragment,
|
||||||
|
- .checksum = nf_ip6_checksum,
|
||||||
|
+ .chk_addr = ipv6_chk_addr,
|
||||||
|
+ .route_input = ip6_route_input,
|
||||||
|
+ .fragment = ip6_fragment,
|
||||||
|
+ .checksum = nf_ip6_checksum,
|
||||||
|
+ .checksum_partial = nf_ip6_checksum_partial,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct nf_afinfo nf_ip6_afinfo = {
|
||||||
|
.family = AF_INET6,
|
||||||
|
- .checksum_partial = nf_ip6_checksum_partial,
|
||||||
|
.route = nf_ip6_route,
|
||||||
|
.saveroute = nf_ip6_saveroute,
|
||||||
|
.reroute = nf_ip6_reroute,
|
||||||
|
--- a/net/netfilter/utils.c
|
||||||
|
+++ b/net/netfilter/utils.c
|
||||||
|
@@ -24,3 +24,27 @@ __sum16 nf_checksum(struct sk_buff *skb,
|
||||||
|
return csum;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nf_checksum);
|
||||||
|
+
|
||||||
|
+__sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
|
||||||
|
+ unsigned int dataoff, unsigned int len,
|
||||||
|
+ u_int8_t protocol, unsigned short family)
|
||||||
|
+{
|
||||||
|
+ const struct nf_ipv6_ops *v6ops;
|
||||||
|
+ __sum16 csum = 0;
|
||||||
|
+
|
||||||
|
+ switch (family) {
|
||||||
|
+ case AF_INET:
|
||||||
|
+ csum = nf_ip_checksum_partial(skb, hook, dataoff, len,
|
||||||
|
+ protocol);
|
||||||
|
+ break;
|
||||||
|
+ case AF_INET6:
|
||||||
|
+ v6ops = rcu_dereference(nf_ipv6_ops);
|
||||||
|
+ if (v6ops)
|
||||||
|
+ csum = v6ops->checksum_partial(skb, hook, dataoff, len,
|
||||||
|
+ protocol);
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return csum;
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(nf_checksum_partial);
|
|
@ -0,0 +1,232 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Wed, 20 Dec 2017 16:12:55 +0100
|
||||||
|
Subject: [PATCH] netfilter: remove saveroute indirection in struct nf_afinfo
|
||||||
|
|
||||||
|
This is only used by nf_queue.c and this function comes with no symbol
|
||||||
|
dependencies with IPv6, it just refers to structure layouts. Therefore,
|
||||||
|
we can replace it by a direct function call from where it belongs.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/linux/netfilter.h
|
||||||
|
+++ b/include/linux/netfilter.h
|
||||||
|
@@ -313,8 +313,6 @@ struct nf_afinfo {
|
||||||
|
unsigned short family;
|
||||||
|
int (*route)(struct net *net, struct dst_entry **dst,
|
||||||
|
struct flowi *fl, bool strict);
|
||||||
|
- void (*saveroute)(const struct sk_buff *skb,
|
||||||
|
- struct nf_queue_entry *entry);
|
||||||
|
int (*reroute)(struct net *net, struct sk_buff *skb,
|
||||||
|
const struct nf_queue_entry *entry);
|
||||||
|
int route_key_size;
|
||||||
|
--- a/include/linux/netfilter_ipv4.h
|
||||||
|
+++ b/include/linux/netfilter_ipv4.h
|
||||||
|
@@ -6,6 +6,16 @@
|
||||||
|
|
||||||
|
#include <uapi/linux/netfilter_ipv4.h>
|
||||||
|
|
||||||
|
+/* Extra routing may needed on local out, as the QUEUE target never returns
|
||||||
|
+ * control to the table.
|
||||||
|
+ */
|
||||||
|
+struct ip_rt_info {
|
||||||
|
+ __be32 daddr;
|
||||||
|
+ __be32 saddr;
|
||||||
|
+ u_int8_t tos;
|
||||||
|
+ u_int32_t mark;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned addr_type);
|
||||||
|
|
||||||
|
#ifdef CONFIG_INET
|
||||||
|
--- a/include/linux/netfilter_ipv6.h
|
||||||
|
+++ b/include/linux/netfilter_ipv6.h
|
||||||
|
@@ -9,6 +9,15 @@
|
||||||
|
|
||||||
|
#include <uapi/linux/netfilter_ipv6.h>
|
||||||
|
|
||||||
|
+/* Extra routing may needed on local out, as the QUEUE target never returns
|
||||||
|
+ * control to the table.
|
||||||
|
+ */
|
||||||
|
+struct ip6_rt_info {
|
||||||
|
+ struct in6_addr daddr;
|
||||||
|
+ struct in6_addr saddr;
|
||||||
|
+ u_int32_t mark;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* Hook functions for ipv6 to allow xt_* modules to be built-in even
|
||||||
|
* if IPv6 is a module.
|
||||||
|
--- a/net/bridge/netfilter/nf_tables_bridge.c
|
||||||
|
+++ b/net/bridge/netfilter/nf_tables_bridge.c
|
||||||
|
@@ -95,11 +95,6 @@ static const struct nf_chain_type filter
|
||||||
|
(1 << NF_BR_POST_ROUTING),
|
||||||
|
};
|
||||||
|
|
||||||
|
-static void nf_br_saveroute(const struct sk_buff *skb,
|
||||||
|
- struct nf_queue_entry *entry)
|
||||||
|
-{
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static int nf_br_reroute(struct net *net, struct sk_buff *skb,
|
||||||
|
const struct nf_queue_entry *entry)
|
||||||
|
{
|
||||||
|
@@ -115,7 +110,6 @@ static int nf_br_route(struct net *net,
|
||||||
|
static const struct nf_afinfo nf_br_afinfo = {
|
||||||
|
.family = AF_BRIDGE,
|
||||||
|
.route = nf_br_route,
|
||||||
|
- .saveroute = nf_br_saveroute,
|
||||||
|
.reroute = nf_br_reroute,
|
||||||
|
.route_key_size = 0,
|
||||||
|
};
|
||||||
|
--- a/net/ipv4/netfilter.c
|
||||||
|
+++ b/net/ipv4/netfilter.c
|
||||||
|
@@ -80,33 +80,6 @@ int ip_route_me_harder(struct net *net,
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(ip_route_me_harder);
|
||||||
|
|
||||||
|
-/*
|
||||||
|
- * Extra routing may needed on local out, as the QUEUE target never
|
||||||
|
- * returns control to the table.
|
||||||
|
- */
|
||||||
|
-
|
||||||
|
-struct ip_rt_info {
|
||||||
|
- __be32 daddr;
|
||||||
|
- __be32 saddr;
|
||||||
|
- u_int8_t tos;
|
||||||
|
- u_int32_t mark;
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
-static void nf_ip_saveroute(const struct sk_buff *skb,
|
||||||
|
- struct nf_queue_entry *entry)
|
||||||
|
-{
|
||||||
|
- struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
|
||||||
|
-
|
||||||
|
- if (entry->state.hook == NF_INET_LOCAL_OUT) {
|
||||||
|
- const struct iphdr *iph = ip_hdr(skb);
|
||||||
|
-
|
||||||
|
- rt_info->tos = iph->tos;
|
||||||
|
- rt_info->daddr = iph->daddr;
|
||||||
|
- rt_info->saddr = iph->saddr;
|
||||||
|
- rt_info->mark = skb->mark;
|
||||||
|
- }
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static int nf_ip_reroute(struct net *net, struct sk_buff *skb,
|
||||||
|
const struct nf_queue_entry *entry)
|
||||||
|
{
|
||||||
|
@@ -190,7 +163,6 @@ static int nf_ip_route(struct net *net,
|
||||||
|
static const struct nf_afinfo nf_ip_afinfo = {
|
||||||
|
.family = AF_INET,
|
||||||
|
.route = nf_ip_route,
|
||||||
|
- .saveroute = nf_ip_saveroute,
|
||||||
|
.reroute = nf_ip_reroute,
|
||||||
|
.route_key_size = sizeof(struct ip_rt_info),
|
||||||
|
};
|
||||||
|
--- a/net/ipv6/netfilter.c
|
||||||
|
+++ b/net/ipv6/netfilter.c
|
||||||
|
@@ -69,31 +69,6 @@ int ip6_route_me_harder(struct net *net,
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(ip6_route_me_harder);
|
||||||
|
|
||||||
|
-/*
|
||||||
|
- * Extra routing may needed on local out, as the QUEUE target never
|
||||||
|
- * returns control to the table.
|
||||||
|
- */
|
||||||
|
-
|
||||||
|
-struct ip6_rt_info {
|
||||||
|
- struct in6_addr daddr;
|
||||||
|
- struct in6_addr saddr;
|
||||||
|
- u_int32_t mark;
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
-static void nf_ip6_saveroute(const struct sk_buff *skb,
|
||||||
|
- struct nf_queue_entry *entry)
|
||||||
|
-{
|
||||||
|
- struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
|
||||||
|
-
|
||||||
|
- if (entry->state.hook == NF_INET_LOCAL_OUT) {
|
||||||
|
- const struct ipv6hdr *iph = ipv6_hdr(skb);
|
||||||
|
-
|
||||||
|
- rt_info->daddr = iph->daddr;
|
||||||
|
- rt_info->saddr = iph->saddr;
|
||||||
|
- rt_info->mark = skb->mark;
|
||||||
|
- }
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static int nf_ip6_reroute(struct net *net, struct sk_buff *skb,
|
||||||
|
const struct nf_queue_entry *entry)
|
||||||
|
{
|
||||||
|
@@ -201,7 +176,6 @@ static const struct nf_ipv6_ops ipv6ops
|
||||||
|
static const struct nf_afinfo nf_ip6_afinfo = {
|
||||||
|
.family = AF_INET6,
|
||||||
|
.route = nf_ip6_route,
|
||||||
|
- .saveroute = nf_ip6_saveroute,
|
||||||
|
.reroute = nf_ip6_reroute,
|
||||||
|
.route_key_size = sizeof(struct ip6_rt_info),
|
||||||
|
};
|
||||||
|
--- a/net/netfilter/nf_queue.c
|
||||||
|
+++ b/net/netfilter/nf_queue.c
|
||||||
|
@@ -10,6 +10,8 @@
|
||||||
|
#include <linux/proc_fs.h>
|
||||||
|
#include <linux/skbuff.h>
|
||||||
|
#include <linux/netfilter.h>
|
||||||
|
+#include <linux/netfilter_ipv4.h>
|
||||||
|
+#include <linux/netfilter_ipv6.h>
|
||||||
|
#include <linux/netfilter_bridge.h>
|
||||||
|
#include <linux/seq_file.h>
|
||||||
|
#include <linux/rcupdate.h>
|
||||||
|
@@ -108,6 +110,35 @@ void nf_queue_nf_hook_drop(struct net *n
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nf_queue_nf_hook_drop);
|
||||||
|
|
||||||
|
+static void nf_ip_saveroute(const struct sk_buff *skb,
|
||||||
|
+ struct nf_queue_entry *entry)
|
||||||
|
+{
|
||||||
|
+ struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
|
||||||
|
+
|
||||||
|
+ if (entry->state.hook == NF_INET_LOCAL_OUT) {
|
||||||
|
+ const struct iphdr *iph = ip_hdr(skb);
|
||||||
|
+
|
||||||
|
+ rt_info->tos = iph->tos;
|
||||||
|
+ rt_info->daddr = iph->daddr;
|
||||||
|
+ rt_info->saddr = iph->saddr;
|
||||||
|
+ rt_info->mark = skb->mark;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void nf_ip6_saveroute(const struct sk_buff *skb,
|
||||||
|
+ struct nf_queue_entry *entry)
|
||||||
|
+{
|
||||||
|
+ struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
|
||||||
|
+
|
||||||
|
+ if (entry->state.hook == NF_INET_LOCAL_OUT) {
|
||||||
|
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
|
||||||
|
+
|
||||||
|
+ rt_info->daddr = iph->daddr;
|
||||||
|
+ rt_info->saddr = iph->saddr;
|
||||||
|
+ rt_info->mark = skb->mark;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
|
||||||
|
const struct nf_hook_entries *entries,
|
||||||
|
unsigned int index, unsigned int queuenum)
|
||||||
|
@@ -144,7 +175,16 @@ static int __nf_queue(struct sk_buff *sk
|
||||||
|
|
||||||
|
nf_queue_entry_get_refs(entry);
|
||||||
|
skb_dst_force(skb);
|
||||||
|
- afinfo->saveroute(skb, entry);
|
||||||
|
+
|
||||||
|
+ switch (entry->state.pf) {
|
||||||
|
+ case AF_INET:
|
||||||
|
+ nf_ip_saveroute(skb, entry);
|
||||||
|
+ break;
|
||||||
|
+ case AF_INET6:
|
||||||
|
+ nf_ip6_saveroute(skb, entry);
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
status = qh->outfn(entry, queuenum);
|
||||||
|
|
||||||
|
if (status < 0) {
|
|
@ -0,0 +1,349 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Mon, 27 Nov 2017 22:29:52 +0100
|
||||||
|
Subject: [PATCH] netfilter: move route indirection to struct nf_ipv6_ops
|
||||||
|
|
||||||
|
We cannot make a direct call to nf_ip6_route() because that would result
|
||||||
|
in autoloading the 'ipv6' module because of symbol dependencies.
|
||||||
|
Therefore, define route indirection in nf_ipv6_ops where this really
|
||||||
|
belongs to.
|
||||||
|
|
||||||
|
For IPv4, we can indeed make a direct function call, which is faster,
|
||||||
|
given IPv4 is built-in in the networking code by default. Still,
|
||||||
|
CONFIG_INET=n and CONFIG_NETFILTER=y is possible, so define empty inline
|
||||||
|
stub for IPv4 in such case.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/linux/netfilter.h
|
||||||
|
+++ b/include/linux/netfilter.h
|
||||||
|
@@ -311,8 +311,6 @@ struct nf_queue_entry;
|
||||||
|
|
||||||
|
struct nf_afinfo {
|
||||||
|
unsigned short family;
|
||||||
|
- int (*route)(struct net *net, struct dst_entry **dst,
|
||||||
|
- struct flowi *fl, bool strict);
|
||||||
|
int (*reroute)(struct net *net, struct sk_buff *skb,
|
||||||
|
const struct nf_queue_entry *entry);
|
||||||
|
int route_key_size;
|
||||||
|
@@ -331,6 +329,8 @@ __sum16 nf_checksum(struct sk_buff *skb,
|
||||||
|
__sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
|
||||||
|
unsigned int dataoff, unsigned int len,
|
||||||
|
u_int8_t protocol, unsigned short family);
|
||||||
|
+int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
|
||||||
|
+ bool strict, unsigned short family);
|
||||||
|
|
||||||
|
int nf_register_afinfo(const struct nf_afinfo *afinfo);
|
||||||
|
void nf_unregister_afinfo(const struct nf_afinfo *afinfo);
|
||||||
|
--- a/include/linux/netfilter_ipv4.h
|
||||||
|
+++ b/include/linux/netfilter_ipv4.h
|
||||||
|
@@ -24,6 +24,8 @@ __sum16 nf_ip_checksum(struct sk_buff *s
|
||||||
|
__sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
|
||||||
|
unsigned int dataoff, unsigned int len,
|
||||||
|
u_int8_t protocol);
|
||||||
|
+int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
|
||||||
|
+ bool strict);
|
||||||
|
#else
|
||||||
|
static inline __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
|
||||||
|
unsigned int dataoff, u_int8_t protocol)
|
||||||
|
@@ -38,6 +40,11 @@ static inline __sum16 nf_ip_checksum_par
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
+static inline int nf_ip_route(struct net *net, struct dst_entry **dst,
|
||||||
|
+ struct flowi *fl, bool strict)
|
||||||
|
+{
|
||||||
|
+ return -EOPNOTSUPP;
|
||||||
|
+}
|
||||||
|
#endif /* CONFIG_INET */
|
||||||
|
|
||||||
|
#endif /*__LINUX_IP_NETFILTER_H*/
|
||||||
|
--- a/include/linux/netfilter_ipv6.h
|
||||||
|
+++ b/include/linux/netfilter_ipv6.h
|
||||||
|
@@ -33,6 +33,8 @@ struct nf_ipv6_ops {
|
||||||
|
__sum16 (*checksum_partial)(struct sk_buff *skb, unsigned int hook,
|
||||||
|
unsigned int dataoff, unsigned int len,
|
||||||
|
u_int8_t protocol);
|
||||||
|
+ int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl,
|
||||||
|
+ bool strict);
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_NETFILTER
|
||||||
|
--- a/net/bridge/netfilter/nf_tables_bridge.c
|
||||||
|
+++ b/net/bridge/netfilter/nf_tables_bridge.c
|
||||||
|
@@ -101,15 +101,8 @@ static int nf_br_reroute(struct net *net
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static int nf_br_route(struct net *net, struct dst_entry **dst,
|
||||||
|
- struct flowi *fl, bool strict __always_unused)
|
||||||
|
-{
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static const struct nf_afinfo nf_br_afinfo = {
|
||||||
|
.family = AF_BRIDGE,
|
||||||
|
- .route = nf_br_route,
|
||||||
|
.reroute = nf_br_reroute,
|
||||||
|
.route_key_size = 0,
|
||||||
|
};
|
||||||
|
--- a/net/ipv4/netfilter.c
|
||||||
|
+++ b/net/ipv4/netfilter.c
|
||||||
|
@@ -150,8 +150,8 @@ __sum16 nf_ip_checksum_partial(struct sk
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nf_ip_checksum_partial);
|
||||||
|
|
||||||
|
-static int nf_ip_route(struct net *net, struct dst_entry **dst,
|
||||||
|
- struct flowi *fl, bool strict __always_unused)
|
||||||
|
+int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
|
||||||
|
+ bool strict __always_unused)
|
||||||
|
{
|
||||||
|
struct rtable *rt = ip_route_output_key(net, &fl->u.ip4);
|
||||||
|
if (IS_ERR(rt))
|
||||||
|
@@ -159,10 +159,10 @@ static int nf_ip_route(struct net *net,
|
||||||
|
*dst = &rt->dst;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
+EXPORT_SYMBOL_GPL(nf_ip_route);
|
||||||
|
|
||||||
|
static const struct nf_afinfo nf_ip_afinfo = {
|
||||||
|
.family = AF_INET,
|
||||||
|
- .route = nf_ip_route,
|
||||||
|
.reroute = nf_ip_reroute,
|
||||||
|
.route_key_size = sizeof(struct ip_rt_info),
|
||||||
|
};
|
||||||
|
--- a/net/ipv6/netfilter.c
|
||||||
|
+++ b/net/ipv6/netfilter.c
|
||||||
|
@@ -171,11 +171,11 @@ static const struct nf_ipv6_ops ipv6ops
|
||||||
|
.fragment = ip6_fragment,
|
||||||
|
.checksum = nf_ip6_checksum,
|
||||||
|
.checksum_partial = nf_ip6_checksum_partial,
|
||||||
|
+ .route = nf_ip6_route,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct nf_afinfo nf_ip6_afinfo = {
|
||||||
|
.family = AF_INET6,
|
||||||
|
- .route = nf_ip6_route,
|
||||||
|
.reroute = nf_ip6_reroute,
|
||||||
|
.route_key_size = sizeof(struct ip6_rt_info),
|
||||||
|
};
|
||||||
|
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
|
||||||
|
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
|
||||||
|
@@ -60,7 +60,6 @@ static u32 __nft_fib6_eval_type(const st
|
||||||
|
{
|
||||||
|
const struct net_device *dev = NULL;
|
||||||
|
const struct nf_ipv6_ops *v6ops;
|
||||||
|
- const struct nf_afinfo *afinfo;
|
||||||
|
int route_err, addrtype;
|
||||||
|
struct rt6_info *rt;
|
||||||
|
struct flowi6 fl6 = {
|
||||||
|
@@ -69,8 +68,8 @@ static u32 __nft_fib6_eval_type(const st
|
||||||
|
};
|
||||||
|
u32 ret = 0;
|
||||||
|
|
||||||
|
- afinfo = nf_get_afinfo(NFPROTO_IPV6);
|
||||||
|
- if (!afinfo)
|
||||||
|
+ v6ops = nf_get_ipv6_ops();
|
||||||
|
+ if (!v6ops)
|
||||||
|
return RTN_UNREACHABLE;
|
||||||
|
|
||||||
|
if (priv->flags & NFTA_FIB_F_IIF)
|
||||||
|
@@ -80,12 +79,11 @@ static u32 __nft_fib6_eval_type(const st
|
||||||
|
|
||||||
|
nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph);
|
||||||
|
|
||||||
|
- v6ops = nf_get_ipv6_ops();
|
||||||
|
- if (dev && v6ops && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
|
||||||
|
+ if (dev && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
|
||||||
|
ret = RTN_LOCAL;
|
||||||
|
|
||||||
|
- route_err = afinfo->route(nft_net(pkt), (struct dst_entry **)&rt,
|
||||||
|
- flowi6_to_flowi(&fl6), false);
|
||||||
|
+ route_err = v6ops->route(nft_net(pkt), (struct dst_entry **)&rt,
|
||||||
|
+ flowi6_to_flowi(&fl6), false);
|
||||||
|
if (route_err)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
--- a/net/netfilter/nf_conntrack_h323_main.c
|
||||||
|
+++ b/net/netfilter/nf_conntrack_h323_main.c
|
||||||
|
@@ -24,6 +24,7 @@
|
||||||
|
#include <linux/skbuff.h>
|
||||||
|
#include <net/route.h>
|
||||||
|
#include <net/ip6_route.h>
|
||||||
|
+#include <linux/netfilter_ipv6.h>
|
||||||
|
|
||||||
|
#include <net/netfilter/nf_conntrack.h>
|
||||||
|
#include <net/netfilter/nf_conntrack_core.h>
|
||||||
|
@@ -732,14 +733,8 @@ static int callforward_do_filter(struct
|
||||||
|
const union nf_inet_addr *dst,
|
||||||
|
u_int8_t family)
|
||||||
|
{
|
||||||
|
- const struct nf_afinfo *afinfo;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
- /* rcu_read_lock()ed by nf_hook_thresh */
|
||||||
|
- afinfo = nf_get_afinfo(family);
|
||||||
|
- if (!afinfo)
|
||||||
|
- return 0;
|
||||||
|
-
|
||||||
|
switch (family) {
|
||||||
|
case AF_INET: {
|
||||||
|
struct flowi4 fl1, fl2;
|
||||||
|
@@ -750,10 +745,10 @@ static int callforward_do_filter(struct
|
||||||
|
|
||||||
|
memset(&fl2, 0, sizeof(fl2));
|
||||||
|
fl2.daddr = dst->ip;
|
||||||
|
- if (!afinfo->route(net, (struct dst_entry **)&rt1,
|
||||||
|
- flowi4_to_flowi(&fl1), false)) {
|
||||||
|
- if (!afinfo->route(net, (struct dst_entry **)&rt2,
|
||||||
|
- flowi4_to_flowi(&fl2), false)) {
|
||||||
|
+ if (!nf_ip_route(net, (struct dst_entry **)&rt1,
|
||||||
|
+ flowi4_to_flowi(&fl1), false)) {
|
||||||
|
+ if (!nf_ip_route(net, (struct dst_entry **)&rt2,
|
||||||
|
+ flowi4_to_flowi(&fl2), false)) {
|
||||||
|
if (rt_nexthop(rt1, fl1.daddr) ==
|
||||||
|
rt_nexthop(rt2, fl2.daddr) &&
|
||||||
|
rt1->dst.dev == rt2->dst.dev)
|
||||||
|
@@ -766,18 +761,23 @@ static int callforward_do_filter(struct
|
||||||
|
}
|
||||||
|
#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
|
||||||
|
case AF_INET6: {
|
||||||
|
- struct flowi6 fl1, fl2;
|
||||||
|
+ const struct nf_ipv6_ops *v6ops;
|
||||||
|
struct rt6_info *rt1, *rt2;
|
||||||
|
+ struct flowi6 fl1, fl2;
|
||||||
|
+
|
||||||
|
+ v6ops = nf_get_ipv6_ops();
|
||||||
|
+ if (!v6ops)
|
||||||
|
+ return 0;
|
||||||
|
|
||||||
|
memset(&fl1, 0, sizeof(fl1));
|
||||||
|
fl1.daddr = src->in6;
|
||||||
|
|
||||||
|
memset(&fl2, 0, sizeof(fl2));
|
||||||
|
fl2.daddr = dst->in6;
|
||||||
|
- if (!afinfo->route(net, (struct dst_entry **)&rt1,
|
||||||
|
- flowi6_to_flowi(&fl1), false)) {
|
||||||
|
- if (!afinfo->route(net, (struct dst_entry **)&rt2,
|
||||||
|
- flowi6_to_flowi(&fl2), false)) {
|
||||||
|
+ if (!v6ops->route(net, (struct dst_entry **)&rt1,
|
||||||
|
+ flowi6_to_flowi(&fl1), false)) {
|
||||||
|
+ if (!v6ops->route(net, (struct dst_entry **)&rt2,
|
||||||
|
+ flowi6_to_flowi(&fl2), false)) {
|
||||||
|
if (ipv6_addr_equal(rt6_nexthop(rt1, &fl1.daddr),
|
||||||
|
rt6_nexthop(rt2, &fl2.daddr)) &&
|
||||||
|
rt1->dst.dev == rt2->dst.dev)
|
||||||
|
--- a/net/netfilter/nft_rt.c
|
||||||
|
+++ b/net/netfilter/nft_rt.c
|
||||||
|
@@ -27,7 +27,7 @@ static u16 get_tcpmss(const struct nft_p
|
||||||
|
{
|
||||||
|
u32 minlen = sizeof(struct ipv6hdr), mtu = dst_mtu(skbdst);
|
||||||
|
const struct sk_buff *skb = pkt->skb;
|
||||||
|
- const struct nf_afinfo *ai;
|
||||||
|
+ struct dst_entry *dst = NULL;
|
||||||
|
struct flowi fl;
|
||||||
|
|
||||||
|
memset(&fl, 0, sizeof(fl));
|
||||||
|
@@ -43,15 +43,10 @@ static u16 get_tcpmss(const struct nft_p
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
- ai = nf_get_afinfo(nft_pf(pkt));
|
||||||
|
- if (ai) {
|
||||||
|
- struct dst_entry *dst = NULL;
|
||||||
|
-
|
||||||
|
- ai->route(nft_net(pkt), &dst, &fl, false);
|
||||||
|
- if (dst) {
|
||||||
|
- mtu = min(mtu, dst_mtu(dst));
|
||||||
|
- dst_release(dst);
|
||||||
|
- }
|
||||||
|
+ nf_route(nft_net(pkt), &dst, &fl, false, nft_pf(pkt));
|
||||||
|
+ if (dst) {
|
||||||
|
+ mtu = min(mtu, dst_mtu(dst));
|
||||||
|
+ dst_release(dst);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mtu <= minlen || mtu > 0xffff)
|
||||||
|
--- a/net/netfilter/utils.c
|
||||||
|
+++ b/net/netfilter/utils.c
|
||||||
|
@@ -48,3 +48,24 @@ __sum16 nf_checksum_partial(struct sk_bu
|
||||||
|
return csum;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nf_checksum_partial);
|
||||||
|
+
|
||||||
|
+int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
|
||||||
|
+ bool strict, unsigned short family)
|
||||||
|
+{
|
||||||
|
+ const struct nf_ipv6_ops *v6ops;
|
||||||
|
+ int ret = 0;
|
||||||
|
+
|
||||||
|
+ switch (family) {
|
||||||
|
+ case AF_INET:
|
||||||
|
+ ret = nf_ip_route(net, dst, fl, strict);
|
||||||
|
+ break;
|
||||||
|
+ case AF_INET6:
|
||||||
|
+ v6ops = rcu_dereference(nf_ipv6_ops);
|
||||||
|
+ if (v6ops)
|
||||||
|
+ ret = v6ops->route(net, dst, fl, strict);
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(nf_route);
|
||||||
|
--- a/net/netfilter/xt_TCPMSS.c
|
||||||
|
+++ b/net/netfilter/xt_TCPMSS.c
|
||||||
|
@@ -48,7 +48,6 @@ static u_int32_t tcpmss_reverse_mtu(stru
|
||||||
|
unsigned int family)
|
||||||
|
{
|
||||||
|
struct flowi fl;
|
||||||
|
- const struct nf_afinfo *ai;
|
||||||
|
struct rtable *rt = NULL;
|
||||||
|
u_int32_t mtu = ~0U;
|
||||||
|
|
||||||
|
@@ -62,10 +61,8 @@ static u_int32_t tcpmss_reverse_mtu(stru
|
||||||
|
memset(fl6, 0, sizeof(*fl6));
|
||||||
|
fl6->daddr = ipv6_hdr(skb)->saddr;
|
||||||
|
}
|
||||||
|
- ai = nf_get_afinfo(family);
|
||||||
|
- if (ai != NULL)
|
||||||
|
- ai->route(net, (struct dst_entry **)&rt, &fl, false);
|
||||||
|
|
||||||
|
+ nf_route(net, (struct dst_entry **)&rt, &fl, false, family);
|
||||||
|
if (rt != NULL) {
|
||||||
|
mtu = dst_mtu(&rt->dst);
|
||||||
|
dst_release(&rt->dst);
|
||||||
|
--- a/net/netfilter/xt_addrtype.c
|
||||||
|
+++ b/net/netfilter/xt_addrtype.c
|
||||||
|
@@ -36,7 +36,7 @@ MODULE_ALIAS("ip6t_addrtype");
|
||||||
|
static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
|
||||||
|
const struct in6_addr *addr, u16 mask)
|
||||||
|
{
|
||||||
|
- const struct nf_afinfo *afinfo;
|
||||||
|
+ const struct nf_ipv6_ops *v6ops;
|
||||||
|
struct flowi6 flow;
|
||||||
|
struct rt6_info *rt;
|
||||||
|
u32 ret = 0;
|
||||||
|
@@ -47,17 +47,14 @@ static u32 match_lookup_rt6(struct net *
|
||||||
|
if (dev)
|
||||||
|
flow.flowi6_oif = dev->ifindex;
|
||||||
|
|
||||||
|
- afinfo = nf_get_afinfo(NFPROTO_IPV6);
|
||||||
|
- if (afinfo != NULL) {
|
||||||
|
- const struct nf_ipv6_ops *v6ops;
|
||||||
|
-
|
||||||
|
+ v6ops = nf_get_ipv6_ops();
|
||||||
|
+ if (v6ops) {
|
||||||
|
if (dev && (mask & XT_ADDRTYPE_LOCAL)) {
|
||||||
|
- v6ops = nf_get_ipv6_ops();
|
||||||
|
- if (v6ops && v6ops->chk_addr(net, addr, dev, true))
|
||||||
|
+ if (v6ops->chk_addr(net, addr, dev, true))
|
||||||
|
ret = XT_ADDRTYPE_LOCAL;
|
||||||
|
}
|
||||||
|
- route_err = afinfo->route(net, (struct dst_entry **)&rt,
|
||||||
|
- flowi6_to_flowi(&flow), false);
|
||||||
|
+ route_err = v6ops->route(net, (struct dst_entry **)&rt,
|
||||||
|
+ flowi6_to_flowi(&flow), false);
|
||||||
|
} else {
|
||||||
|
route_err = 1;
|
||||||
|
}
|
|
@ -0,0 +1,223 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Mon, 27 Nov 2017 22:50:26 +0100
|
||||||
|
Subject: [PATCH] netfilter: move reroute indirection to struct nf_ipv6_ops
|
||||||
|
|
||||||
|
We cannot make a direct call to nf_ip6_reroute() because that would result
|
||||||
|
in autoloading the 'ipv6' module because of symbol dependencies.
|
||||||
|
Therefore, define reroute indirection in nf_ipv6_ops where this really
|
||||||
|
belongs to.
|
||||||
|
|
||||||
|
For IPv4, we can indeed make a direct function call, which is faster,
|
||||||
|
given IPv4 is built-in in the networking code by default. Still,
|
||||||
|
CONFIG_INET=n and CONFIG_NETFILTER=y is possible, so define empty inline
|
||||||
|
stub for IPv4 in such case.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/linux/netfilter.h
|
||||||
|
+++ b/include/linux/netfilter.h
|
||||||
|
@@ -311,8 +311,6 @@ struct nf_queue_entry;
|
||||||
|
|
||||||
|
struct nf_afinfo {
|
||||||
|
unsigned short family;
|
||||||
|
- int (*reroute)(struct net *net, struct sk_buff *skb,
|
||||||
|
- const struct nf_queue_entry *entry);
|
||||||
|
int route_key_size;
|
||||||
|
};
|
||||||
|
|
||||||
|
@@ -331,6 +329,7 @@ __sum16 nf_checksum_partial(struct sk_bu
|
||||||
|
u_int8_t protocol, unsigned short family);
|
||||||
|
int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
|
||||||
|
bool strict, unsigned short family);
|
||||||
|
+int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry);
|
||||||
|
|
||||||
|
int nf_register_afinfo(const struct nf_afinfo *afinfo);
|
||||||
|
void nf_unregister_afinfo(const struct nf_afinfo *afinfo);
|
||||||
|
--- a/include/linux/netfilter_ipv4.h
|
||||||
|
+++ b/include/linux/netfilter_ipv4.h
|
||||||
|
@@ -18,6 +18,8 @@ struct ip_rt_info {
|
||||||
|
|
||||||
|
int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned addr_type);
|
||||||
|
|
||||||
|
+struct nf_queue_entry;
|
||||||
|
+
|
||||||
|
#ifdef CONFIG_INET
|
||||||
|
__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
|
||||||
|
unsigned int dataoff, u_int8_t protocol);
|
||||||
|
@@ -26,6 +28,7 @@ __sum16 nf_ip_checksum_partial(struct sk
|
||||||
|
u_int8_t protocol);
|
||||||
|
int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
|
||||||
|
bool strict);
|
||||||
|
+int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry);
|
||||||
|
#else
|
||||||
|
static inline __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
|
||||||
|
unsigned int dataoff, u_int8_t protocol)
|
||||||
|
@@ -45,6 +48,11 @@ static inline int nf_ip_route(struct net
|
||||||
|
{
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
}
|
||||||
|
+static inline int nf_ip_reroute(struct sk_buff *skb,
|
||||||
|
+ const struct nf_queue_entry *entry)
|
||||||
|
+{
|
||||||
|
+ return -EOPNOTSUPP;
|
||||||
|
+}
|
||||||
|
#endif /* CONFIG_INET */
|
||||||
|
|
||||||
|
#endif /*__LINUX_IP_NETFILTER_H*/
|
||||||
|
--- a/include/linux/netfilter_ipv6.h
|
||||||
|
+++ b/include/linux/netfilter_ipv6.h
|
||||||
|
@@ -18,6 +18,8 @@ struct ip6_rt_info {
|
||||||
|
u_int32_t mark;
|
||||||
|
};
|
||||||
|
|
||||||
|
+struct nf_queue_entry;
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* Hook functions for ipv6 to allow xt_* modules to be built-in even
|
||||||
|
* if IPv6 is a module.
|
||||||
|
@@ -35,6 +37,7 @@ struct nf_ipv6_ops {
|
||||||
|
u_int8_t protocol);
|
||||||
|
int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl,
|
||||||
|
bool strict);
|
||||||
|
+ int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry);
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_NETFILTER
|
||||||
|
--- a/net/bridge/netfilter/nf_tables_bridge.c
|
||||||
|
+++ b/net/bridge/netfilter/nf_tables_bridge.c
|
||||||
|
@@ -95,15 +95,8 @@ static const struct nf_chain_type filter
|
||||||
|
(1 << NF_BR_POST_ROUTING),
|
||||||
|
};
|
||||||
|
|
||||||
|
-static int nf_br_reroute(struct net *net, struct sk_buff *skb,
|
||||||
|
- const struct nf_queue_entry *entry)
|
||||||
|
-{
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static const struct nf_afinfo nf_br_afinfo = {
|
||||||
|
.family = AF_BRIDGE,
|
||||||
|
- .reroute = nf_br_reroute,
|
||||||
|
.route_key_size = 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
--- a/net/ipv4/netfilter.c
|
||||||
|
+++ b/net/ipv4/netfilter.c
|
||||||
|
@@ -80,8 +80,7 @@ int ip_route_me_harder(struct net *net,
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(ip_route_me_harder);
|
||||||
|
|
||||||
|
-static int nf_ip_reroute(struct net *net, struct sk_buff *skb,
|
||||||
|
- const struct nf_queue_entry *entry)
|
||||||
|
+int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry)
|
||||||
|
{
|
||||||
|
const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
|
||||||
|
|
||||||
|
@@ -92,10 +91,12 @@ static int nf_ip_reroute(struct net *net
|
||||||
|
skb->mark == rt_info->mark &&
|
||||||
|
iph->daddr == rt_info->daddr &&
|
||||||
|
iph->saddr == rt_info->saddr))
|
||||||
|
- return ip_route_me_harder(net, skb, RTN_UNSPEC);
|
||||||
|
+ return ip_route_me_harder(entry->state.net, skb,
|
||||||
|
+ RTN_UNSPEC);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
+EXPORT_SYMBOL_GPL(nf_ip_reroute);
|
||||||
|
|
||||||
|
__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
|
||||||
|
unsigned int dataoff, u_int8_t protocol)
|
||||||
|
@@ -163,7 +164,6 @@ EXPORT_SYMBOL_GPL(nf_ip_route);
|
||||||
|
|
||||||
|
static const struct nf_afinfo nf_ip_afinfo = {
|
||||||
|
.family = AF_INET,
|
||||||
|
- .reroute = nf_ip_reroute,
|
||||||
|
.route_key_size = sizeof(struct ip_rt_info),
|
||||||
|
};
|
||||||
|
|
||||||
|
--- a/net/ipv6/netfilter.c
|
||||||
|
+++ b/net/ipv6/netfilter.c
|
||||||
|
@@ -69,7 +69,7 @@ int ip6_route_me_harder(struct net *net,
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(ip6_route_me_harder);
|
||||||
|
|
||||||
|
-static int nf_ip6_reroute(struct net *net, struct sk_buff *skb,
|
||||||
|
+static int nf_ip6_reroute(struct sk_buff *skb,
|
||||||
|
const struct nf_queue_entry *entry)
|
||||||
|
{
|
||||||
|
struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
|
||||||
|
@@ -79,7 +79,7 @@ static int nf_ip6_reroute(struct net *ne
|
||||||
|
if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
|
||||||
|
!ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
|
||||||
|
skb->mark != rt_info->mark)
|
||||||
|
- return ip6_route_me_harder(net, skb);
|
||||||
|
+ return ip6_route_me_harder(entry->state.net, skb);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
@@ -172,11 +172,11 @@ static const struct nf_ipv6_ops ipv6ops
|
||||||
|
.checksum = nf_ip6_checksum,
|
||||||
|
.checksum_partial = nf_ip6_checksum_partial,
|
||||||
|
.route = nf_ip6_route,
|
||||||
|
+ .reroute = nf_ip6_reroute,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct nf_afinfo nf_ip6_afinfo = {
|
||||||
|
.family = AF_INET6,
|
||||||
|
- .reroute = nf_ip6_reroute,
|
||||||
|
.route_key_size = sizeof(struct ip6_rt_info),
|
||||||
|
};
|
||||||
|
|
||||||
|
--- a/net/netfilter/nf_queue.c
|
||||||
|
+++ b/net/netfilter/nf_queue.c
|
||||||
|
@@ -266,7 +266,6 @@ void nf_reinject(struct nf_queue_entry *
|
||||||
|
const struct nf_hook_entry *hook_entry;
|
||||||
|
const struct nf_hook_entries *hooks;
|
||||||
|
struct sk_buff *skb = entry->skb;
|
||||||
|
- const struct nf_afinfo *afinfo;
|
||||||
|
const struct net *net;
|
||||||
|
unsigned int i;
|
||||||
|
int err;
|
||||||
|
@@ -293,8 +292,7 @@ void nf_reinject(struct nf_queue_entry *
|
||||||
|
verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state);
|
||||||
|
|
||||||
|
if (verdict == NF_ACCEPT) {
|
||||||
|
- afinfo = nf_get_afinfo(entry->state.pf);
|
||||||
|
- if (!afinfo || afinfo->reroute(entry->state.net, skb, entry) < 0)
|
||||||
|
+ if (nf_reroute(skb, entry) < 0)
|
||||||
|
verdict = NF_DROP;
|
||||||
|
}
|
||||||
|
|
||||||
|
--- a/net/netfilter/utils.c
|
||||||
|
+++ b/net/netfilter/utils.c
|
||||||
|
@@ -2,6 +2,7 @@
|
||||||
|
#include <linux/netfilter.h>
|
||||||
|
#include <linux/netfilter_ipv4.h>
|
||||||
|
#include <linux/netfilter_ipv6.h>
|
||||||
|
+#include <net/netfilter/nf_queue.h>
|
||||||
|
|
||||||
|
__sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
|
||||||
|
unsigned int dataoff, u_int8_t protocol,
|
||||||
|
@@ -69,3 +70,21 @@ int nf_route(struct net *net, struct dst
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nf_route);
|
||||||
|
+
|
||||||
|
+int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry)
|
||||||
|
+{
|
||||||
|
+ const struct nf_ipv6_ops *v6ops;
|
||||||
|
+ int ret = 0;
|
||||||
|
+
|
||||||
|
+ switch (entry->state.pf) {
|
||||||
|
+ case AF_INET:
|
||||||
|
+ ret = nf_ip_reroute(skb, entry);
|
||||||
|
+ break;
|
||||||
|
+ case AF_INET6:
|
||||||
|
+ v6ops = rcu_dereference(nf_ipv6_ops);
|
||||||
|
+ if (v6ops)
|
||||||
|
+ ret = v6ops->reroute(skb, entry);
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ return ret;
|
||||||
|
+}
|
|
@ -0,0 +1,94 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Mon, 27 Nov 2017 22:58:37 +0100
|
||||||
|
Subject: [PATCH] netfilter: remove route_key_size field in struct nf_afinfo
|
||||||
|
|
||||||
|
This is only needed by nf_queue, place this code where it belongs.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/linux/netfilter.h
|
||||||
|
+++ b/include/linux/netfilter.h
|
||||||
|
@@ -311,7 +311,6 @@ struct nf_queue_entry;
|
||||||
|
|
||||||
|
struct nf_afinfo {
|
||||||
|
unsigned short family;
|
||||||
|
- int route_key_size;
|
||||||
|
};
|
||||||
|
|
||||||
|
extern const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO];
|
||||||
|
--- a/net/ipv4/netfilter.c
|
||||||
|
+++ b/net/ipv4/netfilter.c
|
||||||
|
@@ -164,7 +164,6 @@ EXPORT_SYMBOL_GPL(nf_ip_route);
|
||||||
|
|
||||||
|
static const struct nf_afinfo nf_ip_afinfo = {
|
||||||
|
.family = AF_INET,
|
||||||
|
- .route_key_size = sizeof(struct ip_rt_info),
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __init ipv4_netfilter_init(void)
|
||||||
|
--- a/net/ipv6/netfilter.c
|
||||||
|
+++ b/net/ipv6/netfilter.c
|
||||||
|
@@ -177,7 +177,6 @@ static const struct nf_ipv6_ops ipv6ops
|
||||||
|
|
||||||
|
static const struct nf_afinfo nf_ip6_afinfo = {
|
||||||
|
.family = AF_INET6,
|
||||||
|
- .route_key_size = sizeof(struct ip6_rt_info),
|
||||||
|
};
|
||||||
|
|
||||||
|
int __init ipv6_netfilter_init(void)
|
||||||
|
--- a/net/netfilter/nf_queue.c
|
||||||
|
+++ b/net/netfilter/nf_queue.c
|
||||||
|
@@ -15,6 +15,8 @@
|
||||||
|
#include <linux/netfilter_bridge.h>
|
||||||
|
#include <linux/seq_file.h>
|
||||||
|
#include <linux/rcupdate.h>
|
||||||
|
+#include <linux/netfilter_ipv4.h>
|
||||||
|
+#include <linux/netfilter_ipv6.h>
|
||||||
|
#include <net/protocol.h>
|
||||||
|
#include <net/netfilter/nf_queue.h>
|
||||||
|
#include <net/dst.h>
|
||||||
|
@@ -145,9 +147,9 @@ static int __nf_queue(struct sk_buff *sk
|
||||||
|
{
|
||||||
|
int status = -ENOENT;
|
||||||
|
struct nf_queue_entry *entry = NULL;
|
||||||
|
- const struct nf_afinfo *afinfo;
|
||||||
|
const struct nf_queue_handler *qh;
|
||||||
|
struct net *net = state->net;
|
||||||
|
+ unsigned int route_key_size;
|
||||||
|
|
||||||
|
/* QUEUE == DROP if no one is waiting, to be safe. */
|
||||||
|
qh = rcu_dereference(net->nf.queue_handler);
|
||||||
|
@@ -156,11 +158,19 @@ static int __nf_queue(struct sk_buff *sk
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
- afinfo = nf_get_afinfo(state->pf);
|
||||||
|
- if (!afinfo)
|
||||||
|
- goto err;
|
||||||
|
+ switch (state->pf) {
|
||||||
|
+ case AF_INET:
|
||||||
|
+ route_key_size = sizeof(struct ip_rt_info);
|
||||||
|
+ break;
|
||||||
|
+ case AF_INET6:
|
||||||
|
+ route_key_size = sizeof(struct ip6_rt_info);
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ route_key_size = 0;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
- entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
|
||||||
|
+ entry = kmalloc(sizeof(*entry) + route_key_size, GFP_ATOMIC);
|
||||||
|
if (!entry) {
|
||||||
|
status = -ENOMEM;
|
||||||
|
goto err;
|
||||||
|
@@ -170,7 +180,7 @@ static int __nf_queue(struct sk_buff *sk
|
||||||
|
.skb = skb,
|
||||||
|
.state = *state,
|
||||||
|
.hook_index = index,
|
||||||
|
- .size = sizeof(*entry) + afinfo->route_key_size,
|
||||||
|
+ .size = sizeof(*entry) + route_key_size,
|
||||||
|
};
|
||||||
|
|
||||||
|
nf_queue_entry_get_refs(entry);
|
|
@ -0,0 +1,173 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Sat, 9 Dec 2017 17:05:53 +0100
|
||||||
|
Subject: [PATCH] netfilter: remove struct nf_afinfo and its helper functions
|
||||||
|
|
||||||
|
This abstraction has no clients anymore, remove it.
|
||||||
|
|
||||||
|
This is what remains from previous authors, so correct copyright
|
||||||
|
statement after recent modifications and code removal.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/linux/netfilter.h
|
||||||
|
+++ b/include/linux/netfilter.h
|
||||||
|
@@ -309,16 +309,6 @@ int skb_make_writable(struct sk_buff *sk
|
||||||
|
struct flowi;
|
||||||
|
struct nf_queue_entry;
|
||||||
|
|
||||||
|
-struct nf_afinfo {
|
||||||
|
- unsigned short family;
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
-extern const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO];
|
||||||
|
-static inline const struct nf_afinfo *nf_get_afinfo(unsigned short family)
|
||||||
|
-{
|
||||||
|
- return rcu_dereference(nf_afinfo[family]);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
__sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
|
||||||
|
unsigned int dataoff, u_int8_t protocol,
|
||||||
|
unsigned short family);
|
||||||
|
@@ -330,9 +320,6 @@ int nf_route(struct net *net, struct dst
|
||||||
|
bool strict, unsigned short family);
|
||||||
|
int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry);
|
||||||
|
|
||||||
|
-int nf_register_afinfo(const struct nf_afinfo *afinfo);
|
||||||
|
-void nf_unregister_afinfo(const struct nf_afinfo *afinfo);
|
||||||
|
-
|
||||||
|
#include <net/flow.h>
|
||||||
|
extern void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
|
||||||
|
|
||||||
|
--- a/net/bridge/netfilter/nf_tables_bridge.c
|
||||||
|
+++ b/net/bridge/netfilter/nf_tables_bridge.c
|
||||||
|
@@ -95,30 +95,23 @@ static const struct nf_chain_type filter
|
||||||
|
(1 << NF_BR_POST_ROUTING),
|
||||||
|
};
|
||||||
|
|
||||||
|
-static const struct nf_afinfo nf_br_afinfo = {
|
||||||
|
- .family = AF_BRIDGE,
|
||||||
|
- .route_key_size = 0,
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
static int __init nf_tables_bridge_init(void)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
- nf_register_afinfo(&nf_br_afinfo);
|
||||||
|
ret = nft_register_chain_type(&filter_bridge);
|
||||||
|
if (ret < 0)
|
||||||
|
- goto err1;
|
||||||
|
+ return ret;
|
||||||
|
|
||||||
|
ret = register_pernet_subsys(&nf_tables_bridge_net_ops);
|
||||||
|
if (ret < 0)
|
||||||
|
- goto err2;
|
||||||
|
+ goto err_register_subsys;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
-err2:
|
||||||
|
+err_register_subsys:
|
||||||
|
nft_unregister_chain_type(&filter_bridge);
|
||||||
|
-err1:
|
||||||
|
- nf_unregister_afinfo(&nf_br_afinfo);
|
||||||
|
+
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -126,7 +119,6 @@ static void __exit nf_tables_bridge_exit
|
||||||
|
{
|
||||||
|
unregister_pernet_subsys(&nf_tables_bridge_net_ops);
|
||||||
|
nft_unregister_chain_type(&filter_bridge);
|
||||||
|
- nf_unregister_afinfo(&nf_br_afinfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
module_init(nf_tables_bridge_init);
|
||||||
|
--- a/net/ipv4/netfilter.c
|
||||||
|
+++ b/net/ipv4/netfilter.c
|
||||||
|
@@ -161,13 +161,3 @@ int nf_ip_route(struct net *net, struct
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nf_ip_route);
|
||||||
|
-
|
||||||
|
-static const struct nf_afinfo nf_ip_afinfo = {
|
||||||
|
- .family = AF_INET,
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
-static int __init ipv4_netfilter_init(void)
|
||||||
|
-{
|
||||||
|
- return nf_register_afinfo(&nf_ip_afinfo);
|
||||||
|
-}
|
||||||
|
-subsys_initcall(ipv4_netfilter_init);
|
||||||
|
--- a/net/ipv6/netfilter.c
|
||||||
|
+++ b/net/ipv6/netfilter.c
|
||||||
|
@@ -175,14 +175,10 @@ static const struct nf_ipv6_ops ipv6ops
|
||||||
|
.reroute = nf_ip6_reroute,
|
||||||
|
};
|
||||||
|
|
||||||
|
-static const struct nf_afinfo nf_ip6_afinfo = {
|
||||||
|
- .family = AF_INET6,
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
int __init ipv6_netfilter_init(void)
|
||||||
|
{
|
||||||
|
RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops);
|
||||||
|
- return nf_register_afinfo(&nf_ip6_afinfo);
|
||||||
|
+ return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This can be called from inet6_init() on errors, so it cannot
|
||||||
|
@@ -191,5 +187,4 @@ int __init ipv6_netfilter_init(void)
|
||||||
|
void ipv6_netfilter_fini(void)
|
||||||
|
{
|
||||||
|
RCU_INIT_POINTER(nf_ipv6_ops, NULL);
|
||||||
|
- nf_unregister_afinfo(&nf_ip6_afinfo);
|
||||||
|
}
|
||||||
|
--- a/net/netfilter/core.c
|
||||||
|
+++ b/net/netfilter/core.c
|
||||||
|
@@ -4,8 +4,7 @@
|
||||||
|
* Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
|
||||||
|
* way.
|
||||||
|
*
|
||||||
|
- * Rusty Russell (C)2000 -- This code is GPL.
|
||||||
|
- * Patrick McHardy (c) 2006-2012
|
||||||
|
+ * This code is GPL.
|
||||||
|
*/
|
||||||
|
#include <linux/kernel.h>
|
||||||
|
#include <linux/netfilter.h>
|
||||||
|
@@ -28,34 +27,12 @@
|
||||||
|
|
||||||
|
#include "nf_internals.h"
|
||||||
|
|
||||||
|
-static DEFINE_MUTEX(afinfo_mutex);
|
||||||
|
-
|
||||||
|
-const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
|
||||||
|
-EXPORT_SYMBOL(nf_afinfo);
|
||||||
|
const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
|
||||||
|
EXPORT_SYMBOL_GPL(nf_ipv6_ops);
|
||||||
|
|
||||||
|
DEFINE_PER_CPU(bool, nf_skb_duplicated);
|
||||||
|
EXPORT_SYMBOL_GPL(nf_skb_duplicated);
|
||||||
|
|
||||||
|
-int nf_register_afinfo(const struct nf_afinfo *afinfo)
|
||||||
|
-{
|
||||||
|
- mutex_lock(&afinfo_mutex);
|
||||||
|
- RCU_INIT_POINTER(nf_afinfo[afinfo->family], afinfo);
|
||||||
|
- mutex_unlock(&afinfo_mutex);
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-EXPORT_SYMBOL_GPL(nf_register_afinfo);
|
||||||
|
-
|
||||||
|
-void nf_unregister_afinfo(const struct nf_afinfo *afinfo)
|
||||||
|
-{
|
||||||
|
- mutex_lock(&afinfo_mutex);
|
||||||
|
- RCU_INIT_POINTER(nf_afinfo[afinfo->family], NULL);
|
||||||
|
- mutex_unlock(&afinfo_mutex);
|
||||||
|
- synchronize_rcu();
|
||||||
|
-}
|
||||||
|
-EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
|
||||||
|
-
|
||||||
|
#ifdef HAVE_JUMP_LABEL
|
||||||
|
struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
|
||||||
|
EXPORT_SYMBOL(nf_hooks_needed);
|
|
@ -0,0 +1,20 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Sun, 10 Dec 2017 01:42:58 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_tables_arp: don't set forward chain
|
||||||
|
|
||||||
|
46928a0b49f3 ("netfilter: nf_tables: remove multihook chains and
|
||||||
|
families") already removed this, this is a leftover.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/ipv4/netfilter/nf_tables_arp.c
|
||||||
|
+++ b/net/ipv4/netfilter/nf_tables_arp.c
|
||||||
|
@@ -34,7 +34,6 @@ static struct nft_af_info nft_af_arp __r
|
||||||
|
.hooks = {
|
||||||
|
[NF_ARP_IN] = nft_do_chain_arp,
|
||||||
|
[NF_ARP_OUT] = nft_do_chain_arp,
|
||||||
|
- [NF_ARP_FORWARD] = nft_do_chain_arp,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
|
@ -0,0 +1,233 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Sat, 9 Dec 2017 15:43:17 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_tables: remove hooks from family definition
|
||||||
|
|
||||||
|
They don't belong to the family definition, move them to the filter
|
||||||
|
chain type definition instead.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/net/netfilter/nf_tables.h
|
||||||
|
+++ b/include/net/netfilter/nf_tables.h
|
||||||
|
@@ -875,7 +875,7 @@ enum nft_chain_type {
|
||||||
|
* @family: address family
|
||||||
|
* @owner: module owner
|
||||||
|
* @hook_mask: mask of valid hooks
|
||||||
|
- * @hooks: hookfn overrides
|
||||||
|
+ * @hooks: array of hook functions
|
||||||
|
*/
|
||||||
|
struct nf_chain_type {
|
||||||
|
const char *name;
|
||||||
|
@@ -969,7 +969,6 @@ enum nft_af_flags {
|
||||||
|
* @owner: module owner
|
||||||
|
* @tables: used internally
|
||||||
|
* @flags: family flags
|
||||||
|
- * @hooks: hookfn overrides for packet validation
|
||||||
|
*/
|
||||||
|
struct nft_af_info {
|
||||||
|
struct list_head list;
|
||||||
|
@@ -978,7 +977,6 @@ struct nft_af_info {
|
||||||
|
struct module *owner;
|
||||||
|
struct list_head tables;
|
||||||
|
u32 flags;
|
||||||
|
- nf_hookfn *hooks[NF_MAX_HOOKS];
|
||||||
|
};
|
||||||
|
|
||||||
|
int nft_register_afinfo(struct net *, struct nft_af_info *);
|
||||||
|
--- a/net/bridge/netfilter/nf_tables_bridge.c
|
||||||
|
+++ b/net/bridge/netfilter/nf_tables_bridge.c
|
||||||
|
@@ -46,13 +46,6 @@ static struct nft_af_info nft_af_bridge
|
||||||
|
.family = NFPROTO_BRIDGE,
|
||||||
|
.nhooks = NF_BR_NUMHOOKS,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
- .hooks = {
|
||||||
|
- [NF_BR_PRE_ROUTING] = nft_do_chain_bridge,
|
||||||
|
- [NF_BR_LOCAL_IN] = nft_do_chain_bridge,
|
||||||
|
- [NF_BR_FORWARD] = nft_do_chain_bridge,
|
||||||
|
- [NF_BR_LOCAL_OUT] = nft_do_chain_bridge,
|
||||||
|
- [NF_BR_POST_ROUTING] = nft_do_chain_bridge,
|
||||||
|
- },
|
||||||
|
};
|
||||||
|
|
||||||
|
static int nf_tables_bridge_init_net(struct net *net)
|
||||||
|
@@ -93,6 +86,13 @@ static const struct nf_chain_type filter
|
||||||
|
(1 << NF_BR_FORWARD) |
|
||||||
|
(1 << NF_BR_LOCAL_OUT) |
|
||||||
|
(1 << NF_BR_POST_ROUTING),
|
||||||
|
+ .hooks = {
|
||||||
|
+ [NF_BR_PRE_ROUTING] = nft_do_chain_bridge,
|
||||||
|
+ [NF_BR_LOCAL_IN] = nft_do_chain_bridge,
|
||||||
|
+ [NF_BR_FORWARD] = nft_do_chain_bridge,
|
||||||
|
+ [NF_BR_LOCAL_OUT] = nft_do_chain_bridge,
|
||||||
|
+ [NF_BR_POST_ROUTING] = nft_do_chain_bridge,
|
||||||
|
+ },
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __init nf_tables_bridge_init(void)
|
||||||
|
--- a/net/ipv4/netfilter/nf_tables_arp.c
|
||||||
|
+++ b/net/ipv4/netfilter/nf_tables_arp.c
|
||||||
|
@@ -31,10 +31,6 @@ static struct nft_af_info nft_af_arp __r
|
||||||
|
.family = NFPROTO_ARP,
|
||||||
|
.nhooks = NF_ARP_NUMHOOKS,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
- .hooks = {
|
||||||
|
- [NF_ARP_IN] = nft_do_chain_arp,
|
||||||
|
- [NF_ARP_OUT] = nft_do_chain_arp,
|
||||||
|
- },
|
||||||
|
};
|
||||||
|
|
||||||
|
static int nf_tables_arp_init_net(struct net *net)
|
||||||
|
@@ -72,6 +68,10 @@ static const struct nf_chain_type filter
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
.hook_mask = (1 << NF_ARP_IN) |
|
||||||
|
(1 << NF_ARP_OUT),
|
||||||
|
+ .hooks = {
|
||||||
|
+ [NF_ARP_IN] = nft_do_chain_arp,
|
||||||
|
+ [NF_ARP_OUT] = nft_do_chain_arp,
|
||||||
|
+ },
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __init nf_tables_arp_init(void)
|
||||||
|
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
|
||||||
|
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
|
||||||
|
@@ -49,13 +49,6 @@ static struct nft_af_info nft_af_ipv4 __
|
||||||
|
.family = NFPROTO_IPV4,
|
||||||
|
.nhooks = NF_INET_NUMHOOKS,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
- .hooks = {
|
||||||
|
- [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
|
||||||
|
- [NF_INET_LOCAL_OUT] = nft_ipv4_output,
|
||||||
|
- [NF_INET_FORWARD] = nft_do_chain_ipv4,
|
||||||
|
- [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
|
||||||
|
- [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
|
||||||
|
- },
|
||||||
|
};
|
||||||
|
|
||||||
|
static int nf_tables_ipv4_init_net(struct net *net)
|
||||||
|
@@ -96,6 +89,13 @@ static const struct nf_chain_type filter
|
||||||
|
(1 << NF_INET_FORWARD) |
|
||||||
|
(1 << NF_INET_PRE_ROUTING) |
|
||||||
|
(1 << NF_INET_POST_ROUTING),
|
||||||
|
+ .hooks = {
|
||||||
|
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
|
||||||
|
+ [NF_INET_LOCAL_OUT] = nft_ipv4_output,
|
||||||
|
+ [NF_INET_FORWARD] = nft_do_chain_ipv4,
|
||||||
|
+ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
|
||||||
|
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
|
||||||
|
+ },
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __init nf_tables_ipv4_init(void)
|
||||||
|
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
|
||||||
|
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
|
||||||
|
@@ -46,13 +46,6 @@ static struct nft_af_info nft_af_ipv6 __
|
||||||
|
.family = NFPROTO_IPV6,
|
||||||
|
.nhooks = NF_INET_NUMHOOKS,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
- .hooks = {
|
||||||
|
- [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
|
||||||
|
- [NF_INET_LOCAL_OUT] = nft_ipv6_output,
|
||||||
|
- [NF_INET_FORWARD] = nft_do_chain_ipv6,
|
||||||
|
- [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
|
||||||
|
- [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
|
||||||
|
- },
|
||||||
|
};
|
||||||
|
|
||||||
|
static int nf_tables_ipv6_init_net(struct net *net)
|
||||||
|
@@ -93,6 +86,13 @@ static const struct nf_chain_type filter
|
||||||
|
(1 << NF_INET_FORWARD) |
|
||||||
|
(1 << NF_INET_PRE_ROUTING) |
|
||||||
|
(1 << NF_INET_POST_ROUTING),
|
||||||
|
+ .hooks = {
|
||||||
|
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
|
||||||
|
+ [NF_INET_LOCAL_OUT] = nft_ipv6_output,
|
||||||
|
+ [NF_INET_FORWARD] = nft_do_chain_ipv6,
|
||||||
|
+ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
|
||||||
|
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
|
||||||
|
+ },
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __init nf_tables_ipv6_init(void)
|
||||||
|
--- a/net/netfilter/nf_tables_api.c
|
||||||
|
+++ b/net/netfilter/nf_tables_api.c
|
||||||
|
@@ -1383,7 +1383,6 @@ static int nf_tables_addchain(struct nft
|
||||||
|
if (nla[NFTA_CHAIN_HOOK]) {
|
||||||
|
struct nft_chain_hook hook;
|
||||||
|
struct nf_hook_ops *ops;
|
||||||
|
- nf_hookfn *hookfn;
|
||||||
|
|
||||||
|
err = nft_chain_parse_hook(net, nla, afi, &hook, create);
|
||||||
|
if (err < 0)
|
||||||
|
@@ -1409,7 +1408,6 @@ static int nf_tables_addchain(struct nft
|
||||||
|
static_branch_inc(&nft_counters_enabled);
|
||||||
|
}
|
||||||
|
|
||||||
|
- hookfn = hook.type->hooks[hook.num];
|
||||||
|
basechain->type = hook.type;
|
||||||
|
chain = &basechain->chain;
|
||||||
|
|
||||||
|
@@ -1418,10 +1416,8 @@ static int nf_tables_addchain(struct nft
|
||||||
|
ops->hooknum = hook.num;
|
||||||
|
ops->priority = hook.priority;
|
||||||
|
ops->priv = chain;
|
||||||
|
- ops->hook = afi->hooks[ops->hooknum];
|
||||||
|
+ ops->hook = hook.type->hooks[ops->hooknum];
|
||||||
|
ops->dev = hook.dev;
|
||||||
|
- if (hookfn)
|
||||||
|
- ops->hook = hookfn;
|
||||||
|
|
||||||
|
if (basechain->type->type == NFT_CHAIN_T_NAT)
|
||||||
|
ops->nat_hook = true;
|
||||||
|
--- a/net/netfilter/nf_tables_inet.c
|
||||||
|
+++ b/net/netfilter/nf_tables_inet.c
|
||||||
|
@@ -74,13 +74,6 @@ static struct nft_af_info nft_af_inet __
|
||||||
|
.family = NFPROTO_INET,
|
||||||
|
.nhooks = NF_INET_NUMHOOKS,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
- .hooks = {
|
||||||
|
- [NF_INET_LOCAL_IN] = nft_do_chain_inet,
|
||||||
|
- [NF_INET_LOCAL_OUT] = nft_inet_output,
|
||||||
|
- [NF_INET_FORWARD] = nft_do_chain_inet,
|
||||||
|
- [NF_INET_PRE_ROUTING] = nft_do_chain_inet,
|
||||||
|
- [NF_INET_POST_ROUTING] = nft_do_chain_inet,
|
||||||
|
- },
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __net_init nf_tables_inet_init_net(struct net *net)
|
||||||
|
@@ -121,6 +114,13 @@ static const struct nf_chain_type filter
|
||||||
|
(1 << NF_INET_FORWARD) |
|
||||||
|
(1 << NF_INET_PRE_ROUTING) |
|
||||||
|
(1 << NF_INET_POST_ROUTING),
|
||||||
|
+ .hooks = {
|
||||||
|
+ [NF_INET_LOCAL_IN] = nft_do_chain_inet,
|
||||||
|
+ [NF_INET_LOCAL_OUT] = nft_inet_output,
|
||||||
|
+ [NF_INET_FORWARD] = nft_do_chain_inet,
|
||||||
|
+ [NF_INET_PRE_ROUTING] = nft_do_chain_inet,
|
||||||
|
+ [NF_INET_POST_ROUTING] = nft_do_chain_inet,
|
||||||
|
+ },
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __init nf_tables_inet_init(void)
|
||||||
|
--- a/net/netfilter/nf_tables_netdev.c
|
||||||
|
+++ b/net/netfilter/nf_tables_netdev.c
|
||||||
|
@@ -43,9 +43,6 @@ static struct nft_af_info nft_af_netdev
|
||||||
|
.nhooks = NF_NETDEV_NUMHOOKS,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
.flags = NFT_AF_NEEDS_DEV,
|
||||||
|
- .hooks = {
|
||||||
|
- [NF_NETDEV_INGRESS] = nft_do_chain_netdev,
|
||||||
|
- },
|
||||||
|
};
|
||||||
|
|
||||||
|
static int nf_tables_netdev_init_net(struct net *net)
|
||||||
|
@@ -82,6 +79,9 @@ static const struct nf_chain_type nft_fi
|
||||||
|
.family = NFPROTO_NETDEV,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
.hook_mask = (1 << NF_NETDEV_INGRESS),
|
||||||
|
+ .hooks = {
|
||||||
|
+ [NF_NETDEV_INGRESS] = nft_do_chain_netdev,
|
||||||
|
+ },
|
||||||
|
};
|
||||||
|
|
||||||
|
static void nft_netdev_event(unsigned long event, struct net_device *dev,
|
|
@ -0,0 +1,302 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Sat, 30 Dec 2017 22:41:46 +0100
|
||||||
|
Subject: [PATCH] netfilter: remove defensive check on malformed packets from
|
||||||
|
raw sockets
|
||||||
|
|
||||||
|
Users cannot forge malformed IPv4/IPv6 headers via raw sockets that they
|
||||||
|
can inject into the stack. Specifically, not for IPv4 since 55888dfb6ba7
|
||||||
|
("AF_RAW: Augment raw_send_hdrinc to expand skb to fit iphdr->ihl
|
||||||
|
(v2)"). IPv6 raw sockets also ensure that packets have a well-formed
|
||||||
|
IPv6 header available in the skbuff.
|
||||||
|
|
||||||
|
At quick glance, br_netfilter also validates layer 3 headers and it
|
||||||
|
drops malformed both IPv4 and IPv6 packets.
|
||||||
|
|
||||||
|
Therefore, let's remove this defensive check all over the place.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/ipv4/netfilter/iptable_filter.c
|
||||||
|
+++ b/net/ipv4/netfilter/iptable_filter.c
|
||||||
|
@@ -38,12 +38,6 @@ static unsigned int
|
||||||
|
iptable_filter_hook(void *priv, struct sk_buff *skb,
|
||||||
|
const struct nf_hook_state *state)
|
||||||
|
{
|
||||||
|
- if (state->hook == NF_INET_LOCAL_OUT &&
|
||||||
|
- (skb->len < sizeof(struct iphdr) ||
|
||||||
|
- ip_hdrlen(skb) < sizeof(struct iphdr)))
|
||||||
|
- /* root is playing with raw sockets. */
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
-
|
||||||
|
return ipt_do_table(skb, state, state->net->ipv4.iptable_filter);
|
||||||
|
}
|
||||||
|
|
||||||
|
--- a/net/ipv4/netfilter/iptable_mangle.c
|
||||||
|
+++ b/net/ipv4/netfilter/iptable_mangle.c
|
||||||
|
@@ -49,11 +49,6 @@ ipt_mangle_out(struct sk_buff *skb, cons
|
||||||
|
u_int32_t mark;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
- /* root is playing with raw sockets. */
|
||||||
|
- if (skb->len < sizeof(struct iphdr) ||
|
||||||
|
- ip_hdrlen(skb) < sizeof(struct iphdr))
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
-
|
||||||
|
/* Save things which could affect route */
|
||||||
|
mark = skb->mark;
|
||||||
|
iph = ip_hdr(skb);
|
||||||
|
--- a/net/ipv4/netfilter/iptable_raw.c
|
||||||
|
+++ b/net/ipv4/netfilter/iptable_raw.c
|
||||||
|
@@ -26,12 +26,6 @@ static unsigned int
|
||||||
|
iptable_raw_hook(void *priv, struct sk_buff *skb,
|
||||||
|
const struct nf_hook_state *state)
|
||||||
|
{
|
||||||
|
- if (state->hook == NF_INET_LOCAL_OUT &&
|
||||||
|
- (skb->len < sizeof(struct iphdr) ||
|
||||||
|
- ip_hdrlen(skb) < sizeof(struct iphdr)))
|
||||||
|
- /* root is playing with raw sockets. */
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
-
|
||||||
|
return ipt_do_table(skb, state, state->net->ipv4.iptable_raw);
|
||||||
|
}
|
||||||
|
|
||||||
|
--- a/net/ipv4/netfilter/iptable_security.c
|
||||||
|
+++ b/net/ipv4/netfilter/iptable_security.c
|
||||||
|
@@ -43,12 +43,6 @@ static unsigned int
|
||||||
|
iptable_security_hook(void *priv, struct sk_buff *skb,
|
||||||
|
const struct nf_hook_state *state)
|
||||||
|
{
|
||||||
|
- if (state->hook == NF_INET_LOCAL_OUT &&
|
||||||
|
- (skb->len < sizeof(struct iphdr) ||
|
||||||
|
- ip_hdrlen(skb) < sizeof(struct iphdr)))
|
||||||
|
- /* Somebody is playing with raw sockets. */
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
-
|
||||||
|
return ipt_do_table(skb, state, state->net->ipv4.iptable_security);
|
||||||
|
}
|
||||||
|
|
||||||
|
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
|
||||||
|
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
|
||||||
|
@@ -154,11 +154,6 @@ static unsigned int ipv4_conntrack_local
|
||||||
|
struct sk_buff *skb,
|
||||||
|
const struct nf_hook_state *state)
|
||||||
|
{
|
||||||
|
- /* root is playing with raw sockets. */
|
||||||
|
- if (skb->len < sizeof(struct iphdr) ||
|
||||||
|
- ip_hdrlen(skb) < sizeof(struct iphdr))
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
-
|
||||||
|
if (ip_is_fragment(ip_hdr(skb))) { /* IP_NODEFRAG setsockopt set */
|
||||||
|
enum ip_conntrack_info ctinfo;
|
||||||
|
struct nf_conn *tmpl;
|
||||||
|
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
|
||||||
|
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
|
||||||
|
@@ -355,11 +355,6 @@ nf_nat_ipv4_out(void *priv, struct sk_bu
|
||||||
|
#endif
|
||||||
|
unsigned int ret;
|
||||||
|
|
||||||
|
- /* root is playing with raw sockets. */
|
||||||
|
- if (skb->len < sizeof(struct iphdr) ||
|
||||||
|
- ip_hdrlen(skb) < sizeof(struct iphdr))
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
-
|
||||||
|
ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
|
||||||
|
#ifdef CONFIG_XFRM
|
||||||
|
if (ret != NF_DROP && ret != NF_STOLEN &&
|
||||||
|
@@ -395,11 +390,6 @@ nf_nat_ipv4_local_fn(void *priv, struct
|
||||||
|
unsigned int ret;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
- /* root is playing with raw sockets. */
|
||||||
|
- if (skb->len < sizeof(struct iphdr) ||
|
||||||
|
- ip_hdrlen(skb) < sizeof(struct iphdr))
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
-
|
||||||
|
ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
|
||||||
|
if (ret != NF_DROP && ret != NF_STOLEN &&
|
||||||
|
(ct = nf_ct_get(skb, &ctinfo)) != NULL) {
|
||||||
|
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
|
||||||
|
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
|
||||||
|
@@ -30,21 +30,6 @@ static unsigned int nft_do_chain_ipv4(vo
|
||||||
|
return nft_do_chain(&pkt, priv);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static unsigned int nft_ipv4_output(void *priv,
|
||||||
|
- struct sk_buff *skb,
|
||||||
|
- const struct nf_hook_state *state)
|
||||||
|
-{
|
||||||
|
- if (unlikely(skb->len < sizeof(struct iphdr) ||
|
||||||
|
- ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) {
|
||||||
|
- if (net_ratelimit())
|
||||||
|
- pr_info("nf_tables_ipv4: ignoring short SOCK_RAW "
|
||||||
|
- "packet\n");
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- return nft_do_chain_ipv4(priv, skb, state);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static struct nft_af_info nft_af_ipv4 __read_mostly = {
|
||||||
|
.family = NFPROTO_IPV4,
|
||||||
|
.nhooks = NF_INET_NUMHOOKS,
|
||||||
|
@@ -91,7 +76,7 @@ static const struct nf_chain_type filter
|
||||||
|
(1 << NF_INET_POST_ROUTING),
|
||||||
|
.hooks = {
|
||||||
|
[NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
|
||||||
|
- [NF_INET_LOCAL_OUT] = nft_ipv4_output,
|
||||||
|
+ [NF_INET_LOCAL_OUT] = nft_do_chain_ipv4,
|
||||||
|
[NF_INET_FORWARD] = nft_do_chain_ipv4,
|
||||||
|
[NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
|
||||||
|
[NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
|
||||||
|
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
|
||||||
|
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
|
||||||
|
@@ -33,11 +33,6 @@ static unsigned int nf_route_table_hook(
|
||||||
|
const struct iphdr *iph;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
- /* root is playing with raw sockets. */
|
||||||
|
- if (skb->len < sizeof(struct iphdr) ||
|
||||||
|
- ip_hdrlen(skb) < sizeof(struct iphdr))
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
-
|
||||||
|
nft_set_pktinfo(&pkt, skb, state);
|
||||||
|
nft_set_pktinfo_ipv4(&pkt, skb);
|
||||||
|
|
||||||
|
--- a/net/ipv6/netfilter/ip6table_mangle.c
|
||||||
|
+++ b/net/ipv6/netfilter/ip6table_mangle.c
|
||||||
|
@@ -42,14 +42,6 @@ ip6t_mangle_out(struct sk_buff *skb, con
|
||||||
|
u_int8_t hop_limit;
|
||||||
|
u_int32_t flowlabel, mark;
|
||||||
|
int err;
|
||||||
|
-#if 0
|
||||||
|
- /* root is playing with raw sockets. */
|
||||||
|
- if (skb->len < sizeof(struct iphdr) ||
|
||||||
|
- ip_hdrlen(skb) < sizeof(struct iphdr)) {
|
||||||
|
- net_warn_ratelimited("ip6t_hook: happy cracking\n");
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
- }
|
||||||
|
-#endif
|
||||||
|
|
||||||
|
/* save source/dest address, mark, hoplimit, flowlabel, priority, */
|
||||||
|
memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
|
||||||
|
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
|
||||||
|
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
|
||||||
|
@@ -176,11 +176,6 @@ static unsigned int ipv6_conntrack_local
|
||||||
|
struct sk_buff *skb,
|
||||||
|
const struct nf_hook_state *state)
|
||||||
|
{
|
||||||
|
- /* root is playing with raw sockets. */
|
||||||
|
- if (skb->len < sizeof(struct ipv6hdr)) {
|
||||||
|
- net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
- }
|
||||||
|
return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
|
||||||
|
}
|
||||||
|
|
||||||
|
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
|
||||||
|
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
|
||||||
|
@@ -372,10 +372,6 @@ nf_nat_ipv6_out(void *priv, struct sk_bu
|
||||||
|
#endif
|
||||||
|
unsigned int ret;
|
||||||
|
|
||||||
|
- /* root is playing with raw sockets. */
|
||||||
|
- if (skb->len < sizeof(struct ipv6hdr))
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
-
|
||||||
|
ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
|
||||||
|
#ifdef CONFIG_XFRM
|
||||||
|
if (ret != NF_DROP && ret != NF_STOLEN &&
|
||||||
|
@@ -411,10 +407,6 @@ nf_nat_ipv6_local_fn(void *priv, struct
|
||||||
|
unsigned int ret;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
- /* root is playing with raw sockets. */
|
||||||
|
- if (skb->len < sizeof(struct ipv6hdr))
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
-
|
||||||
|
ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
|
||||||
|
if (ret != NF_DROP && ret != NF_STOLEN &&
|
||||||
|
(ct = nf_ct_get(skb, &ctinfo)) != NULL) {
|
||||||
|
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
|
||||||
|
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
|
||||||
|
@@ -28,20 +28,6 @@ static unsigned int nft_do_chain_ipv6(vo
|
||||||
|
return nft_do_chain(&pkt, priv);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static unsigned int nft_ipv6_output(void *priv,
|
||||||
|
- struct sk_buff *skb,
|
||||||
|
- const struct nf_hook_state *state)
|
||||||
|
-{
|
||||||
|
- if (unlikely(skb->len < sizeof(struct ipv6hdr))) {
|
||||||
|
- if (net_ratelimit())
|
||||||
|
- pr_info("nf_tables_ipv6: ignoring short SOCK_RAW "
|
||||||
|
- "packet\n");
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- return nft_do_chain_ipv6(priv, skb, state);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static struct nft_af_info nft_af_ipv6 __read_mostly = {
|
||||||
|
.family = NFPROTO_IPV6,
|
||||||
|
.nhooks = NF_INET_NUMHOOKS,
|
||||||
|
@@ -88,7 +74,7 @@ static const struct nf_chain_type filter
|
||||||
|
(1 << NF_INET_POST_ROUTING),
|
||||||
|
.hooks = {
|
||||||
|
[NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
|
||||||
|
- [NF_INET_LOCAL_OUT] = nft_ipv6_output,
|
||||||
|
+ [NF_INET_LOCAL_OUT] = nft_do_chain_ipv6,
|
||||||
|
[NF_INET_FORWARD] = nft_do_chain_ipv6,
|
||||||
|
[NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
|
||||||
|
[NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
|
||||||
|
--- a/net/netfilter/nf_tables_inet.c
|
||||||
|
+++ b/net/netfilter/nf_tables_inet.c
|
||||||
|
@@ -38,38 +38,6 @@ static unsigned int nft_do_chain_inet(vo
|
||||||
|
return nft_do_chain(&pkt, priv);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static unsigned int nft_inet_output(void *priv, struct sk_buff *skb,
|
||||||
|
- const struct nf_hook_state *state)
|
||||||
|
-{
|
||||||
|
- struct nft_pktinfo pkt;
|
||||||
|
-
|
||||||
|
- nft_set_pktinfo(&pkt, skb, state);
|
||||||
|
-
|
||||||
|
- switch (state->pf) {
|
||||||
|
- case NFPROTO_IPV4:
|
||||||
|
- if (unlikely(skb->len < sizeof(struct iphdr) ||
|
||||||
|
- ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) {
|
||||||
|
- if (net_ratelimit())
|
||||||
|
- pr_info("ignoring short SOCK_RAW packet\n");
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
- }
|
||||||
|
- nft_set_pktinfo_ipv4(&pkt, skb);
|
||||||
|
- break;
|
||||||
|
- case NFPROTO_IPV6:
|
||||||
|
- if (unlikely(skb->len < sizeof(struct ipv6hdr))) {
|
||||||
|
- if (net_ratelimit())
|
||||||
|
- pr_info("ignoring short SOCK_RAW packet\n");
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
- }
|
||||||
|
- nft_set_pktinfo_ipv6(&pkt, skb);
|
||||||
|
- break;
|
||||||
|
- default:
|
||||||
|
- break;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- return nft_do_chain(&pkt, priv);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static struct nft_af_info nft_af_inet __read_mostly = {
|
||||||
|
.family = NFPROTO_INET,
|
||||||
|
.nhooks = NF_INET_NUMHOOKS,
|
||||||
|
@@ -116,7 +84,7 @@ static const struct nf_chain_type filter
|
||||||
|
(1 << NF_INET_POST_ROUTING),
|
||||||
|
.hooks = {
|
||||||
|
[NF_INET_LOCAL_IN] = nft_do_chain_inet,
|
||||||
|
- [NF_INET_LOCAL_OUT] = nft_inet_output,
|
||||||
|
+ [NF_INET_LOCAL_OUT] = nft_do_chain_inet,
|
||||||
|
[NF_INET_FORWARD] = nft_do_chain_inet,
|
||||||
|
[NF_INET_PRE_ROUTING] = nft_do_chain_inet,
|
||||||
|
[NF_INET_POST_ROUTING] = nft_do_chain_inet,
|
|
@ -0,0 +1,101 @@
|
||||||
|
From: Florian Westphal <fw@strlen.de>
|
||||||
|
Date: Wed, 6 Dec 2017 16:18:16 +0100
|
||||||
|
Subject: [PATCH] netfilter: meta: secpath support
|
||||||
|
|
||||||
|
replacement for iptables "-m policy --dir in --policy {ipsec,none}".
|
||||||
|
|
||||||
|
Signed-off-by: Florian Westphal <fw@strlen.de>
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/uapi/linux/netfilter/nf_tables.h
|
||||||
|
+++ b/include/uapi/linux/netfilter/nf_tables.h
|
||||||
|
@@ -777,6 +777,7 @@ enum nft_exthdr_attributes {
|
||||||
|
* @NFT_META_OIFGROUP: packet output interface group
|
||||||
|
* @NFT_META_CGROUP: socket control group (skb->sk->sk_classid)
|
||||||
|
* @NFT_META_PRANDOM: a 32bit pseudo-random number
|
||||||
|
+ * @NFT_META_SECPATH: boolean, secpath_exists (!!skb->sp)
|
||||||
|
*/
|
||||||
|
enum nft_meta_keys {
|
||||||
|
NFT_META_LEN,
|
||||||
|
@@ -804,6 +805,7 @@ enum nft_meta_keys {
|
||||||
|
NFT_META_OIFGROUP,
|
||||||
|
NFT_META_CGROUP,
|
||||||
|
NFT_META_PRANDOM,
|
||||||
|
+ NFT_META_SECPATH,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
--- a/net/netfilter/nft_meta.c
|
||||||
|
+++ b/net/netfilter/nft_meta.c
|
||||||
|
@@ -210,6 +210,11 @@ void nft_meta_get_eval(const struct nft_
|
||||||
|
*dest = prandom_u32_state(state);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
+#ifdef CONFIG_XFRM
|
||||||
|
+ case NFT_META_SECPATH:
|
||||||
|
+ nft_reg_store8(dest, !!skb->sp);
|
||||||
|
+ break;
|
||||||
|
+#endif
|
||||||
|
default:
|
||||||
|
WARN_ON(1);
|
||||||
|
goto err;
|
||||||
|
@@ -310,6 +315,11 @@ int nft_meta_get_init(const struct nft_c
|
||||||
|
prandom_init_once(&nft_prandom_state);
|
||||||
|
len = sizeof(u32);
|
||||||
|
break;
|
||||||
|
+#ifdef CONFIG_XFRM
|
||||||
|
+ case NFT_META_SECPATH:
|
||||||
|
+ len = sizeof(u8);
|
||||||
|
+ break;
|
||||||
|
+#endif
|
||||||
|
default:
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
}
|
||||||
|
@@ -320,6 +330,38 @@ int nft_meta_get_init(const struct nft_c
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nft_meta_get_init);
|
||||||
|
|
||||||
|
+static int nft_meta_get_validate(const struct nft_ctx *ctx,
|
||||||
|
+ const struct nft_expr *expr,
|
||||||
|
+ const struct nft_data **data)
|
||||||
|
+{
|
||||||
|
+#ifdef CONFIG_XFRM
|
||||||
|
+ const struct nft_meta *priv = nft_expr_priv(expr);
|
||||||
|
+ unsigned int hooks;
|
||||||
|
+
|
||||||
|
+ if (priv->key != NFT_META_SECPATH)
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+ switch (ctx->afi->family) {
|
||||||
|
+ case NFPROTO_NETDEV:
|
||||||
|
+ hooks = 1 << NF_NETDEV_INGRESS;
|
||||||
|
+ break;
|
||||||
|
+ case NFPROTO_IPV4:
|
||||||
|
+ case NFPROTO_IPV6:
|
||||||
|
+ case NFPROTO_INET:
|
||||||
|
+ hooks = (1 << NF_INET_PRE_ROUTING) |
|
||||||
|
+ (1 << NF_INET_LOCAL_IN) |
|
||||||
|
+ (1 << NF_INET_FORWARD);
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ return -EOPNOTSUPP;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return nft_chain_validate_hooks(ctx->chain, hooks);
|
||||||
|
+#else
|
||||||
|
+ return 0;
|
||||||
|
+#endif
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
int nft_meta_set_validate(const struct nft_ctx *ctx,
|
||||||
|
const struct nft_expr *expr,
|
||||||
|
const struct nft_data **data)
|
||||||
|
@@ -436,6 +478,7 @@ static const struct nft_expr_ops nft_met
|
||||||
|
.eval = nft_meta_get_eval,
|
||||||
|
.init = nft_meta_get_init,
|
||||||
|
.dump = nft_meta_get_dump,
|
||||||
|
+ .validate = nft_meta_get_validate,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct nft_expr_ops nft_meta_set_ops = {
|
|
@ -0,0 +1,142 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Fri, 3 Nov 2017 16:26:32 +0100
|
||||||
|
Subject: [PATCH] netfilter: conntrack: move nf_ct_netns_{get,put}() to core
|
||||||
|
|
||||||
|
So we can call this from other expression that need conntrack in place
|
||||||
|
to work.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Acked-by: Florian Westphal <fw@strlen.de>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/netfilter/nf_conntrack_proto.c
|
||||||
|
+++ b/net/netfilter/nf_conntrack_proto.c
|
||||||
|
@@ -125,7 +125,7 @@ void nf_ct_l3proto_module_put(unsigned s
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
|
||||||
|
|
||||||
|
-int nf_ct_netns_get(struct net *net, u8 nfproto)
|
||||||
|
+static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
|
||||||
|
{
|
||||||
|
const struct nf_conntrack_l3proto *l3proto;
|
||||||
|
int ret;
|
||||||
|
@@ -150,9 +150,33 @@ int nf_ct_netns_get(struct net *net, u8
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+int nf_ct_netns_get(struct net *net, u8 nfproto)
|
||||||
|
+{
|
||||||
|
+ int err;
|
||||||
|
+
|
||||||
|
+ if (nfproto == NFPROTO_INET) {
|
||||||
|
+ err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
|
||||||
|
+ if (err < 0)
|
||||||
|
+ goto err1;
|
||||||
|
+ err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
|
||||||
|
+ if (err < 0)
|
||||||
|
+ goto err2;
|
||||||
|
+ } else {
|
||||||
|
+ err = nf_ct_netns_do_get(net, nfproto);
|
||||||
|
+ if (err < 0)
|
||||||
|
+ goto err1;
|
||||||
|
+ }
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+err2:
|
||||||
|
+ nf_ct_netns_put(net, NFPROTO_IPV4);
|
||||||
|
+err1:
|
||||||
|
+ return err;
|
||||||
|
+}
|
||||||
|
EXPORT_SYMBOL_GPL(nf_ct_netns_get);
|
||||||
|
|
||||||
|
-void nf_ct_netns_put(struct net *net, u8 nfproto)
|
||||||
|
+static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
|
||||||
|
{
|
||||||
|
const struct nf_conntrack_l3proto *l3proto;
|
||||||
|
|
||||||
|
@@ -171,6 +195,15 @@ void nf_ct_netns_put(struct net *net, u8
|
||||||
|
|
||||||
|
nf_ct_l3proto_module_put(nfproto);
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+void nf_ct_netns_put(struct net *net, uint8_t nfproto)
|
||||||
|
+{
|
||||||
|
+ if (nfproto == NFPROTO_INET) {
|
||||||
|
+ nf_ct_netns_do_put(net, NFPROTO_IPV4);
|
||||||
|
+ nf_ct_netns_do_put(net, NFPROTO_IPV6);
|
||||||
|
+ } else
|
||||||
|
+ nf_ct_netns_do_put(net, nfproto);
|
||||||
|
+}
|
||||||
|
EXPORT_SYMBOL_GPL(nf_ct_netns_put);
|
||||||
|
|
||||||
|
const struct nf_conntrack_l4proto *
|
||||||
|
--- a/net/netfilter/nft_ct.c
|
||||||
|
+++ b/net/netfilter/nft_ct.c
|
||||||
|
@@ -312,39 +312,6 @@ static const struct nla_policy nft_ct_po
|
||||||
|
[NFTA_CT_SREG] = { .type = NLA_U32 },
|
||||||
|
};
|
||||||
|
|
||||||
|
-static int nft_ct_netns_get(struct net *net, uint8_t family)
|
||||||
|
-{
|
||||||
|
- int err;
|
||||||
|
-
|
||||||
|
- if (family == NFPROTO_INET) {
|
||||||
|
- err = nf_ct_netns_get(net, NFPROTO_IPV4);
|
||||||
|
- if (err < 0)
|
||||||
|
- goto err1;
|
||||||
|
- err = nf_ct_netns_get(net, NFPROTO_IPV6);
|
||||||
|
- if (err < 0)
|
||||||
|
- goto err2;
|
||||||
|
- } else {
|
||||||
|
- err = nf_ct_netns_get(net, family);
|
||||||
|
- if (err < 0)
|
||||||
|
- goto err1;
|
||||||
|
- }
|
||||||
|
- return 0;
|
||||||
|
-
|
||||||
|
-err2:
|
||||||
|
- nf_ct_netns_put(net, NFPROTO_IPV4);
|
||||||
|
-err1:
|
||||||
|
- return err;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static void nft_ct_netns_put(struct net *net, uint8_t family)
|
||||||
|
-{
|
||||||
|
- if (family == NFPROTO_INET) {
|
||||||
|
- nf_ct_netns_put(net, NFPROTO_IPV4);
|
||||||
|
- nf_ct_netns_put(net, NFPROTO_IPV6);
|
||||||
|
- } else
|
||||||
|
- nf_ct_netns_put(net, family);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
#ifdef CONFIG_NF_CONNTRACK_ZONES
|
||||||
|
static void nft_ct_tmpl_put_pcpu(void)
|
||||||
|
{
|
||||||
|
@@ -489,7 +456,7 @@ static int nft_ct_get_init(const struct
|
||||||
|
if (err < 0)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
- err = nft_ct_netns_get(ctx->net, ctx->afi->family);
|
||||||
|
+ err = nf_ct_netns_get(ctx->net, ctx->afi->family);
|
||||||
|
if (err < 0)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
@@ -583,7 +550,7 @@ static int nft_ct_set_init(const struct
|
||||||
|
if (err < 0)
|
||||||
|
goto err1;
|
||||||
|
|
||||||
|
- err = nft_ct_netns_get(ctx->net, ctx->afi->family);
|
||||||
|
+ err = nf_ct_netns_get(ctx->net, ctx->afi->family);
|
||||||
|
if (err < 0)
|
||||||
|
goto err1;
|
||||||
|
|
||||||
|
@@ -606,7 +573,7 @@ static void nft_ct_set_destroy(const str
|
||||||
|
struct nft_ct *priv = nft_expr_priv(expr);
|
||||||
|
|
||||||
|
__nft_ct_set_destroy(ctx, priv);
|
||||||
|
- nft_ct_netns_put(ctx->net, ctx->afi->family);
|
||||||
|
+ nf_ct_netns_put(ctx->net, ctx->afi->family);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
|
|
@ -0,0 +1,169 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Sun, 7 Jan 2018 01:03:56 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_conntrack: add IPS_OFFLOAD status bit
|
||||||
|
|
||||||
|
This new bit tells us that the conntrack entry is owned by the flow
|
||||||
|
table offload infrastructure.
|
||||||
|
|
||||||
|
# cat /proc/net/nf_conntrack
|
||||||
|
ipv4 2 tcp 6 src=10.141.10.2 dst=147.75.205.195 sport=36392 dport=443 src=147.75.205.195 dst=192.168.2.195 sport=443 dport=36392 [OFFLOAD] mark=0 zone=0 use=2
|
||||||
|
|
||||||
|
Note the [OFFLOAD] tag in the listing.
|
||||||
|
|
||||||
|
The timer of such conntrack entries look like stopped from userspace.
|
||||||
|
In practise, to make sure the conntrack entry does not go away, the
|
||||||
|
conntrack timer is periodically set to an arbitrary large value that
|
||||||
|
gets refreshed on every iteration from the garbage collector, so it
|
||||||
|
never expires- and they display no internal state in the case of TCP
|
||||||
|
flows. This allows us to save a bitcheck from the packet path via
|
||||||
|
nf_ct_is_expired().
|
||||||
|
|
||||||
|
Conntrack entries that have been offloaded to the flow table
|
||||||
|
infrastructure cannot be deleted/flushed via ctnetlink. The flow table
|
||||||
|
infrastructure is also responsible for releasing this conntrack entry.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
|
||||||
|
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
|
||||||
|
@@ -101,12 +101,16 @@ enum ip_conntrack_status {
|
||||||
|
IPS_HELPER_BIT = 13,
|
||||||
|
IPS_HELPER = (1 << IPS_HELPER_BIT),
|
||||||
|
|
||||||
|
+ /* Conntrack has been offloaded to flow table. */
|
||||||
|
+ IPS_OFFLOAD_BIT = 14,
|
||||||
|
+ IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT),
|
||||||
|
+
|
||||||
|
/* Be careful here, modifying these bits can make things messy,
|
||||||
|
* so don't let users modify them directly.
|
||||||
|
*/
|
||||||
|
IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
|
||||||
|
IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
|
||||||
|
- IPS_SEQ_ADJUST | IPS_TEMPLATE),
|
||||||
|
+ IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD),
|
||||||
|
|
||||||
|
__IPS_MAX_BIT = 14,
|
||||||
|
};
|
||||||
|
--- a/net/netfilter/nf_conntrack_core.c
|
||||||
|
+++ b/net/netfilter/nf_conntrack_core.c
|
||||||
|
@@ -901,6 +901,9 @@ static unsigned int early_drop_list(stru
|
||||||
|
hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
|
||||||
|
tmp = nf_ct_tuplehash_to_ctrack(h);
|
||||||
|
|
||||||
|
+ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
if (nf_ct_is_expired(tmp)) {
|
||||||
|
nf_ct_gc_expired(tmp);
|
||||||
|
continue;
|
||||||
|
@@ -978,6 +981,18 @@ static bool gc_worker_can_early_drop(con
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
+#define DAY (86400 * HZ)
|
||||||
|
+
|
||||||
|
+/* Set an arbitrary timeout large enough not to ever expire, this save
|
||||||
|
+ * us a check for the IPS_OFFLOAD_BIT from the packet path via
|
||||||
|
+ * nf_ct_is_expired().
|
||||||
|
+ */
|
||||||
|
+static void nf_ct_offload_timeout(struct nf_conn *ct)
|
||||||
|
+{
|
||||||
|
+ if (nf_ct_expires(ct) < DAY / 2)
|
||||||
|
+ ct->timeout = nfct_time_stamp + DAY;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static void gc_worker(struct work_struct *work)
|
||||||
|
{
|
||||||
|
unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
|
||||||
|
@@ -1014,6 +1029,11 @@ static void gc_worker(struct work_struct
|
||||||
|
tmp = nf_ct_tuplehash_to_ctrack(h);
|
||||||
|
|
||||||
|
scanned++;
|
||||||
|
+ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
|
||||||
|
+ nf_ct_offload_timeout(tmp);
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
if (nf_ct_is_expired(tmp)) {
|
||||||
|
nf_ct_gc_expired(tmp);
|
||||||
|
expired_count++;
|
||||||
|
--- a/net/netfilter/nf_conntrack_netlink.c
|
||||||
|
+++ b/net/netfilter/nf_conntrack_netlink.c
|
||||||
|
@@ -1120,6 +1120,14 @@ static const struct nla_policy ct_nla_po
|
||||||
|
.len = NF_CT_LABELS_MAX_SIZE },
|
||||||
|
};
|
||||||
|
|
||||||
|
+static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data)
|
||||||
|
+{
|
||||||
|
+ if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+ return ctnetlink_filter_match(ct, data);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static int ctnetlink_flush_conntrack(struct net *net,
|
||||||
|
const struct nlattr * const cda[],
|
||||||
|
u32 portid, int report)
|
||||||
|
@@ -1132,7 +1140,7 @@ static int ctnetlink_flush_conntrack(str
|
||||||
|
return PTR_ERR(filter);
|
||||||
|
}
|
||||||
|
|
||||||
|
- nf_ct_iterate_cleanup_net(net, ctnetlink_filter_match, filter,
|
||||||
|
+ nf_ct_iterate_cleanup_net(net, ctnetlink_flush_iterate, filter,
|
||||||
|
portid, report);
|
||||||
|
kfree(filter);
|
||||||
|
|
||||||
|
@@ -1178,6 +1186,11 @@ static int ctnetlink_del_conntrack(struc
|
||||||
|
|
||||||
|
ct = nf_ct_tuplehash_to_ctrack(h);
|
||||||
|
|
||||||
|
+ if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) {
|
||||||
|
+ nf_ct_put(ct);
|
||||||
|
+ return -EBUSY;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
if (cda[CTA_ID]) {
|
||||||
|
u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID]));
|
||||||
|
if (id != (u32)(unsigned long)ct) {
|
||||||
|
--- a/net/netfilter/nf_conntrack_proto_tcp.c
|
||||||
|
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
|
||||||
|
@@ -305,6 +305,9 @@ static bool tcp_invert_tuple(struct nf_c
|
||||||
|
/* Print out the private part of the conntrack. */
|
||||||
|
static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
|
||||||
|
{
|
||||||
|
+ if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
--- a/net/netfilter/nf_conntrack_standalone.c
|
||||||
|
+++ b/net/netfilter/nf_conntrack_standalone.c
|
||||||
|
@@ -309,10 +309,12 @@ static int ct_seq_show(struct seq_file *
|
||||||
|
WARN_ON(!l4proto);
|
||||||
|
|
||||||
|
ret = -ENOSPC;
|
||||||
|
- seq_printf(s, "%-8s %u %-8s %u %ld ",
|
||||||
|
+ seq_printf(s, "%-8s %u %-8s %u ",
|
||||||
|
l3proto_name(l3proto->l3proto), nf_ct_l3num(ct),
|
||||||
|
- l4proto_name(l4proto->l4proto), nf_ct_protonum(ct),
|
||||||
|
- nf_ct_expires(ct) / HZ);
|
||||||
|
+ l4proto_name(l4proto->l4proto), nf_ct_protonum(ct));
|
||||||
|
+
|
||||||
|
+ if (!test_bit(IPS_OFFLOAD_BIT, &ct->status))
|
||||||
|
+ seq_printf(s, "%ld ", nf_ct_expires(ct) / HZ);
|
||||||
|
|
||||||
|
if (l4proto->print_conntrack)
|
||||||
|
l4proto->print_conntrack(s, ct);
|
||||||
|
@@ -339,7 +341,9 @@ static int ct_seq_show(struct seq_file *
|
||||||
|
if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
|
||||||
|
goto release;
|
||||||
|
|
||||||
|
- if (test_bit(IPS_ASSURED_BIT, &ct->status))
|
||||||
|
+ if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
|
||||||
|
+ seq_puts(s, "[OFFLOAD] ");
|
||||||
|
+ else if (test_bit(IPS_ASSURED_BIT, &ct->status))
|
||||||
|
seq_puts(s, "[ASSURED] ");
|
||||||
|
|
||||||
|
if (seq_has_overflowed(s))
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,586 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Sun, 7 Jan 2018 01:04:11 +0100
|
||||||
|
Subject: [PATCH] netfilter: add generic flow table infrastructure
|
||||||
|
|
||||||
|
This patch defines the API to interact with flow tables, this allows to
|
||||||
|
add, delete and lookup for entries in the flow table. This also adds the
|
||||||
|
generic garbage code that removes entries that have expired, ie. no
|
||||||
|
traffic has been seen for a while.
|
||||||
|
|
||||||
|
Users of the flow table infrastructure can delete entries via
|
||||||
|
flow_offload_dead(), which sets the dying bit, this signals the garbage
|
||||||
|
collector to release an entry from user context.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
create mode 100644 net/netfilter/nf_flow_table.c
|
||||||
|
|
||||||
|
--- a/include/net/netfilter/nf_flow_table.h
|
||||||
|
+++ b/include/net/netfilter/nf_flow_table.h
|
||||||
|
@@ -1,7 +1,12 @@
|
||||||
|
#ifndef _NF_FLOW_TABLE_H
|
||||||
|
#define _NF_FLOW_TABLE_H
|
||||||
|
|
||||||
|
+#include <linux/in.h>
|
||||||
|
+#include <linux/in6.h>
|
||||||
|
+#include <linux/netdevice.h>
|
||||||
|
#include <linux/rhashtable.h>
|
||||||
|
+#include <linux/rcupdate.h>
|
||||||
|
+#include <net/dst.h>
|
||||||
|
|
||||||
|
struct nf_flowtable;
|
||||||
|
|
||||||
|
@@ -20,4 +25,93 @@ struct nf_flowtable {
|
||||||
|
struct delayed_work gc_work;
|
||||||
|
};
|
||||||
|
|
||||||
|
+enum flow_offload_tuple_dir {
|
||||||
|
+ FLOW_OFFLOAD_DIR_ORIGINAL,
|
||||||
|
+ FLOW_OFFLOAD_DIR_REPLY,
|
||||||
|
+ __FLOW_OFFLOAD_DIR_MAX = FLOW_OFFLOAD_DIR_REPLY,
|
||||||
|
+};
|
||||||
|
+#define FLOW_OFFLOAD_DIR_MAX (__FLOW_OFFLOAD_DIR_MAX + 1)
|
||||||
|
+
|
||||||
|
+struct flow_offload_tuple {
|
||||||
|
+ union {
|
||||||
|
+ struct in_addr src_v4;
|
||||||
|
+ struct in6_addr src_v6;
|
||||||
|
+ };
|
||||||
|
+ union {
|
||||||
|
+ struct in_addr dst_v4;
|
||||||
|
+ struct in6_addr dst_v6;
|
||||||
|
+ };
|
||||||
|
+ struct {
|
||||||
|
+ __be16 src_port;
|
||||||
|
+ __be16 dst_port;
|
||||||
|
+ };
|
||||||
|
+
|
||||||
|
+ int iifidx;
|
||||||
|
+
|
||||||
|
+ u8 l3proto;
|
||||||
|
+ u8 l4proto;
|
||||||
|
+ u8 dir;
|
||||||
|
+
|
||||||
|
+ int oifidx;
|
||||||
|
+
|
||||||
|
+ struct dst_entry *dst_cache;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+struct flow_offload_tuple_rhash {
|
||||||
|
+ struct rhash_head node;
|
||||||
|
+ struct flow_offload_tuple tuple;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+#define FLOW_OFFLOAD_SNAT 0x1
|
||||||
|
+#define FLOW_OFFLOAD_DNAT 0x2
|
||||||
|
+#define FLOW_OFFLOAD_DYING 0x4
|
||||||
|
+
|
||||||
|
+struct flow_offload {
|
||||||
|
+ struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
|
||||||
|
+ u32 flags;
|
||||||
|
+ union {
|
||||||
|
+ /* Your private driver data here. */
|
||||||
|
+ u32 timeout;
|
||||||
|
+ };
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+#define NF_FLOW_TIMEOUT (30 * HZ)
|
||||||
|
+
|
||||||
|
+struct nf_flow_route {
|
||||||
|
+ struct {
|
||||||
|
+ struct dst_entry *dst;
|
||||||
|
+ int ifindex;
|
||||||
|
+ } tuple[FLOW_OFFLOAD_DIR_MAX];
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
|
||||||
|
+ struct nf_flow_route *route);
|
||||||
|
+void flow_offload_free(struct flow_offload *flow);
|
||||||
|
+
|
||||||
|
+int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
|
||||||
|
+void flow_offload_del(struct nf_flowtable *flow_table, struct flow_offload *flow);
|
||||||
|
+struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
|
||||||
|
+ struct flow_offload_tuple *tuple);
|
||||||
|
+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
|
||||||
|
+ void (*iter)(struct flow_offload *flow, void *data),
|
||||||
|
+ void *data);
|
||||||
|
+void nf_flow_offload_work_gc(struct work_struct *work);
|
||||||
|
+extern const struct rhashtable_params nf_flow_offload_rhash_params;
|
||||||
|
+
|
||||||
|
+void flow_offload_dead(struct flow_offload *flow);
|
||||||
|
+
|
||||||
|
+int nf_flow_snat_port(const struct flow_offload *flow,
|
||||||
|
+ struct sk_buff *skb, unsigned int thoff,
|
||||||
|
+ u8 protocol, enum flow_offload_tuple_dir dir);
|
||||||
|
+int nf_flow_dnat_port(const struct flow_offload *flow,
|
||||||
|
+ struct sk_buff *skb, unsigned int thoff,
|
||||||
|
+ u8 protocol, enum flow_offload_tuple_dir dir);
|
||||||
|
+
|
||||||
|
+struct flow_ports {
|
||||||
|
+ __be16 source, dest;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+#define MODULE_ALIAS_NF_FLOWTABLE(family) \
|
||||||
|
+ MODULE_ALIAS("nf-flowtable-" __stringify(family))
|
||||||
|
+
|
||||||
|
#endif /* _FLOW_OFFLOAD_H */
|
||||||
|
--- a/net/netfilter/Kconfig
|
||||||
|
+++ b/net/netfilter/Kconfig
|
||||||
|
@@ -667,6 +667,13 @@ endif # NF_TABLES_NETDEV
|
||||||
|
|
||||||
|
endif # NF_TABLES
|
||||||
|
|
||||||
|
+config NF_FLOW_TABLE
|
||||||
|
+ tristate "Netfilter flow table module"
|
||||||
|
+ help
|
||||||
|
+ This option adds the flow table core infrastructure.
|
||||||
|
+
|
||||||
|
+ To compile it as a module, choose M here.
|
||||||
|
+
|
||||||
|
config NETFILTER_XTABLES
|
||||||
|
tristate "Netfilter Xtables support (required for ip_tables)"
|
||||||
|
default m if NETFILTER_ADVANCED=n
|
||||||
|
--- a/net/netfilter/Makefile
|
||||||
|
+++ b/net/netfilter/Makefile
|
||||||
|
@@ -110,6 +110,9 @@ obj-$(CONFIG_NFT_FIB_NETDEV) += nft_fib_
|
||||||
|
obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
|
||||||
|
obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
|
||||||
|
|
||||||
|
+# flow table infrastructure
|
||||||
|
+obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
|
||||||
|
+
|
||||||
|
# generic X tables
|
||||||
|
obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
|
||||||
|
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/net/netfilter/nf_flow_table.c
|
||||||
|
@@ -0,0 +1,429 @@
|
||||||
|
+#include <linux/kernel.h>
|
||||||
|
+#include <linux/init.h>
|
||||||
|
+#include <linux/module.h>
|
||||||
|
+#include <linux/netfilter.h>
|
||||||
|
+#include <linux/rhashtable.h>
|
||||||
|
+#include <linux/netdevice.h>
|
||||||
|
+#include <net/netfilter/nf_flow_table.h>
|
||||||
|
+#include <net/netfilter/nf_conntrack.h>
|
||||||
|
+#include <net/netfilter/nf_conntrack_core.h>
|
||||||
|
+#include <net/netfilter/nf_conntrack_tuple.h>
|
||||||
|
+
|
||||||
|
+struct flow_offload_entry {
|
||||||
|
+ struct flow_offload flow;
|
||||||
|
+ struct nf_conn *ct;
|
||||||
|
+ struct rcu_head rcu_head;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+struct flow_offload *
|
||||||
|
+flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
|
||||||
|
+{
|
||||||
|
+ struct flow_offload_entry *entry;
|
||||||
|
+ struct flow_offload *flow;
|
||||||
|
+
|
||||||
|
+ if (unlikely(nf_ct_is_dying(ct) ||
|
||||||
|
+ !atomic_inc_not_zero(&ct->ct_general.use)))
|
||||||
|
+ return NULL;
|
||||||
|
+
|
||||||
|
+ entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
|
||||||
|
+ if (!entry)
|
||||||
|
+ goto err_ct_refcnt;
|
||||||
|
+
|
||||||
|
+ flow = &entry->flow;
|
||||||
|
+
|
||||||
|
+ if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
|
||||||
|
+ goto err_dst_cache_original;
|
||||||
|
+
|
||||||
|
+ if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
|
||||||
|
+ goto err_dst_cache_reply;
|
||||||
|
+
|
||||||
|
+ entry->ct = ct;
|
||||||
|
+
|
||||||
|
+ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num) {
|
||||||
|
+ case NFPROTO_IPV4:
|
||||||
|
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4 =
|
||||||
|
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in;
|
||||||
|
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4 =
|
||||||
|
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in;
|
||||||
|
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4 =
|
||||||
|
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in;
|
||||||
|
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4 =
|
||||||
|
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in;
|
||||||
|
+ break;
|
||||||
|
+ case NFPROTO_IPV6:
|
||||||
|
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6 =
|
||||||
|
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6;
|
||||||
|
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6 =
|
||||||
|
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6;
|
||||||
|
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6 =
|
||||||
|
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in6;
|
||||||
|
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6 =
|
||||||
|
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in6;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l3proto =
|
||||||
|
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
|
||||||
|
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto =
|
||||||
|
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
|
||||||
|
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l3proto =
|
||||||
|
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
|
||||||
|
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l4proto =
|
||||||
|
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
|
||||||
|
+
|
||||||
|
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache =
|
||||||
|
+ route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst;
|
||||||
|
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache =
|
||||||
|
+ route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst;
|
||||||
|
+
|
||||||
|
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port =
|
||||||
|
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port;
|
||||||
|
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port =
|
||||||
|
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
|
||||||
|
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port =
|
||||||
|
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port;
|
||||||
|
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port =
|
||||||
|
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
|
||||||
|
+
|
||||||
|
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dir =
|
||||||
|
+ FLOW_OFFLOAD_DIR_ORIGINAL;
|
||||||
|
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dir =
|
||||||
|
+ FLOW_OFFLOAD_DIR_REPLY;
|
||||||
|
+
|
||||||
|
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx =
|
||||||
|
+ route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
|
||||||
|
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.oifidx =
|
||||||
|
+ route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
|
||||||
|
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx =
|
||||||
|
+ route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
|
||||||
|
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.oifidx =
|
||||||
|
+ route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
|
||||||
|
+
|
||||||
|
+ if (ct->status & IPS_SRC_NAT)
|
||||||
|
+ flow->flags |= FLOW_OFFLOAD_SNAT;
|
||||||
|
+ else if (ct->status & IPS_DST_NAT)
|
||||||
|
+ flow->flags |= FLOW_OFFLOAD_DNAT;
|
||||||
|
+
|
||||||
|
+ return flow;
|
||||||
|
+
|
||||||
|
+err_dst_cache_reply:
|
||||||
|
+ dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
|
||||||
|
+err_dst_cache_original:
|
||||||
|
+ kfree(entry);
|
||||||
|
+err_ct_refcnt:
|
||||||
|
+ nf_ct_put(ct);
|
||||||
|
+
|
||||||
|
+ return NULL;
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(flow_offload_alloc);
|
||||||
|
+
|
||||||
|
+void flow_offload_free(struct flow_offload *flow)
|
||||||
|
+{
|
||||||
|
+ struct flow_offload_entry *e;
|
||||||
|
+
|
||||||
|
+ dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
|
||||||
|
+ dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
|
||||||
|
+ e = container_of(flow, struct flow_offload_entry, flow);
|
||||||
|
+ kfree(e);
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(flow_offload_free);
|
||||||
|
+
|
||||||
|
+void flow_offload_dead(struct flow_offload *flow)
|
||||||
|
+{
|
||||||
|
+ flow->flags |= FLOW_OFFLOAD_DYING;
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(flow_offload_dead);
|
||||||
|
+
|
||||||
|
+int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
|
||||||
|
+{
|
||||||
|
+ flow->timeout = (u32)jiffies;
|
||||||
|
+
|
||||||
|
+ rhashtable_insert_fast(&flow_table->rhashtable,
|
||||||
|
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
|
||||||
|
+ *flow_table->type->params);
|
||||||
|
+ rhashtable_insert_fast(&flow_table->rhashtable,
|
||||||
|
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
|
||||||
|
+ *flow_table->type->params);
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(flow_offload_add);
|
||||||
|
+
|
||||||
|
+void flow_offload_del(struct nf_flowtable *flow_table,
|
||||||
|
+ struct flow_offload *flow)
|
||||||
|
+{
|
||||||
|
+ struct flow_offload_entry *e;
|
||||||
|
+
|
||||||
|
+ rhashtable_remove_fast(&flow_table->rhashtable,
|
||||||
|
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
|
||||||
|
+ *flow_table->type->params);
|
||||||
|
+ rhashtable_remove_fast(&flow_table->rhashtable,
|
||||||
|
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
|
||||||
|
+ *flow_table->type->params);
|
||||||
|
+
|
||||||
|
+ e = container_of(flow, struct flow_offload_entry, flow);
|
||||||
|
+ kfree_rcu(e, rcu_head);
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(flow_offload_del);
|
||||||
|
+
|
||||||
|
+struct flow_offload_tuple_rhash *
|
||||||
|
+flow_offload_lookup(struct nf_flowtable *flow_table,
|
||||||
|
+ struct flow_offload_tuple *tuple)
|
||||||
|
+{
|
||||||
|
+ return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
|
||||||
|
+ *flow_table->type->params);
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(flow_offload_lookup);
|
||||||
|
+
|
||||||
|
+static void nf_flow_release_ct(const struct flow_offload *flow)
|
||||||
|
+{
|
||||||
|
+ struct flow_offload_entry *e;
|
||||||
|
+
|
||||||
|
+ e = container_of(flow, struct flow_offload_entry, flow);
|
||||||
|
+ nf_ct_delete(e->ct, 0, 0);
|
||||||
|
+ nf_ct_put(e->ct);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
|
||||||
|
+ void (*iter)(struct flow_offload *flow, void *data),
|
||||||
|
+ void *data)
|
||||||
|
+{
|
||||||
|
+ struct flow_offload_tuple_rhash *tuplehash;
|
||||||
|
+ struct rhashtable_iter hti;
|
||||||
|
+ struct flow_offload *flow;
|
||||||
|
+ int err;
|
||||||
|
+
|
||||||
|
+ err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
|
||||||
|
+ if (err)
|
||||||
|
+ return err;
|
||||||
|
+
|
||||||
|
+ rhashtable_walk_start(&hti);
|
||||||
|
+
|
||||||
|
+ while ((tuplehash = rhashtable_walk_next(&hti))) {
|
||||||
|
+ if (IS_ERR(tuplehash)) {
|
||||||
|
+ err = PTR_ERR(tuplehash);
|
||||||
|
+ if (err != -EAGAIN)
|
||||||
|
+ goto out;
|
||||||
|
+
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+ if (tuplehash->tuple.dir)
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
|
||||||
|
+
|
||||||
|
+ iter(flow, data);
|
||||||
|
+ }
|
||||||
|
+out:
|
||||||
|
+ rhashtable_walk_stop(&hti);
|
||||||
|
+ rhashtable_walk_exit(&hti);
|
||||||
|
+
|
||||||
|
+ return err;
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
|
||||||
|
+
|
||||||
|
+static inline bool nf_flow_has_expired(const struct flow_offload *flow)
|
||||||
|
+{
|
||||||
|
+ return (__s32)(flow->timeout - (u32)jiffies) <= 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static inline bool nf_flow_is_dying(const struct flow_offload *flow)
|
||||||
|
+{
|
||||||
|
+ return flow->flags & FLOW_OFFLOAD_DYING;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void nf_flow_offload_work_gc(struct work_struct *work)
|
||||||
|
+{
|
||||||
|
+ struct flow_offload_tuple_rhash *tuplehash;
|
||||||
|
+ struct nf_flowtable *flow_table;
|
||||||
|
+ struct rhashtable_iter hti;
|
||||||
|
+ struct flow_offload *flow;
|
||||||
|
+ int err;
|
||||||
|
+
|
||||||
|
+ flow_table = container_of(work, struct nf_flowtable, gc_work.work);
|
||||||
|
+
|
||||||
|
+ err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
|
||||||
|
+ if (err)
|
||||||
|
+ goto schedule;
|
||||||
|
+
|
||||||
|
+ rhashtable_walk_start(&hti);
|
||||||
|
+
|
||||||
|
+ while ((tuplehash = rhashtable_walk_next(&hti))) {
|
||||||
|
+ if (IS_ERR(tuplehash)) {
|
||||||
|
+ err = PTR_ERR(tuplehash);
|
||||||
|
+ if (err != -EAGAIN)
|
||||||
|
+ goto out;
|
||||||
|
+
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+ if (tuplehash->tuple.dir)
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
|
||||||
|
+
|
||||||
|
+ if (nf_flow_has_expired(flow) ||
|
||||||
|
+ nf_flow_is_dying(flow)) {
|
||||||
|
+ flow_offload_del(flow_table, flow);
|
||||||
|
+ nf_flow_release_ct(flow);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+out:
|
||||||
|
+ rhashtable_walk_stop(&hti);
|
||||||
|
+ rhashtable_walk_exit(&hti);
|
||||||
|
+schedule:
|
||||||
|
+ queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
|
||||||
|
+
|
||||||
|
+static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
|
||||||
|
+{
|
||||||
|
+ const struct flow_offload_tuple *tuple = data;
|
||||||
|
+
|
||||||
|
+ return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
|
||||||
|
+{
|
||||||
|
+ const struct flow_offload_tuple_rhash *tuplehash = data;
|
||||||
|
+
|
||||||
|
+ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
|
||||||
|
+ const void *ptr)
|
||||||
|
+{
|
||||||
|
+ const struct flow_offload_tuple *tuple = arg->key;
|
||||||
|
+ const struct flow_offload_tuple_rhash *x = ptr;
|
||||||
|
+
|
||||||
|
+ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
|
||||||
|
+ return 1;
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+const struct rhashtable_params nf_flow_offload_rhash_params = {
|
||||||
|
+ .head_offset = offsetof(struct flow_offload_tuple_rhash, node),
|
||||||
|
+ .hashfn = flow_offload_hash,
|
||||||
|
+ .obj_hashfn = flow_offload_hash_obj,
|
||||||
|
+ .obj_cmpfn = flow_offload_hash_cmp,
|
||||||
|
+ .automatic_shrinking = true,
|
||||||
|
+};
|
||||||
|
+EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params);
|
||||||
|
+
|
||||||
|
+static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
|
||||||
|
+ __be16 port, __be16 new_port)
|
||||||
|
+{
|
||||||
|
+ struct tcphdr *tcph;
|
||||||
|
+
|
||||||
|
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
|
||||||
|
+ skb_try_make_writable(skb, thoff + sizeof(*tcph)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ tcph = (void *)(skb_network_header(skb) + thoff);
|
||||||
|
+ inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
|
||||||
|
+ __be16 port, __be16 new_port)
|
||||||
|
+{
|
||||||
|
+ struct udphdr *udph;
|
||||||
|
+
|
||||||
|
+ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
|
||||||
|
+ skb_try_make_writable(skb, thoff + sizeof(*udph)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ udph = (void *)(skb_network_header(skb) + thoff);
|
||||||
|
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
|
||||||
|
+ inet_proto_csum_replace2(&udph->check, skb, port,
|
||||||
|
+ new_port, true);
|
||||||
|
+ if (!udph->check)
|
||||||
|
+ udph->check = CSUM_MANGLED_0;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
|
||||||
|
+ u8 protocol, __be16 port, __be16 new_port)
|
||||||
|
+{
|
||||||
|
+ switch (protocol) {
|
||||||
|
+ case IPPROTO_TCP:
|
||||||
|
+ if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
|
||||||
|
+ return NF_DROP;
|
||||||
|
+ break;
|
||||||
|
+ case IPPROTO_UDP:
|
||||||
|
+ if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
|
||||||
|
+ return NF_DROP;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int nf_flow_snat_port(const struct flow_offload *flow,
|
||||||
|
+ struct sk_buff *skb, unsigned int thoff,
|
||||||
|
+ u8 protocol, enum flow_offload_tuple_dir dir)
|
||||||
|
+{
|
||||||
|
+ struct flow_ports *hdr;
|
||||||
|
+ __be16 port, new_port;
|
||||||
|
+
|
||||||
|
+ if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
|
||||||
|
+ skb_try_make_writable(skb, thoff + sizeof(*hdr)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ hdr = (void *)(skb_network_header(skb) + thoff);
|
||||||
|
+
|
||||||
|
+ switch (dir) {
|
||||||
|
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||||
|
+ port = hdr->source;
|
||||||
|
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
|
||||||
|
+ hdr->source = new_port;
|
||||||
|
+ break;
|
||||||
|
+ case FLOW_OFFLOAD_DIR_REPLY:
|
||||||
|
+ port = hdr->dest;
|
||||||
|
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
|
||||||
|
+ hdr->dest = new_port;
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(nf_flow_snat_port);
|
||||||
|
+
|
||||||
|
+int nf_flow_dnat_port(const struct flow_offload *flow,
|
||||||
|
+ struct sk_buff *skb, unsigned int thoff,
|
||||||
|
+ u8 protocol, enum flow_offload_tuple_dir dir)
|
||||||
|
+{
|
||||||
|
+ struct flow_ports *hdr;
|
||||||
|
+ __be16 port, new_port;
|
||||||
|
+
|
||||||
|
+ if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
|
||||||
|
+ skb_try_make_writable(skb, thoff + sizeof(*hdr)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ hdr = (void *)(skb_network_header(skb) + thoff);
|
||||||
|
+
|
||||||
|
+ switch (dir) {
|
||||||
|
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||||
|
+ port = hdr->dest;
|
||||||
|
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port;
|
||||||
|
+ hdr->dest = new_port;
|
||||||
|
+ break;
|
||||||
|
+ case FLOW_OFFLOAD_DIR_REPLY:
|
||||||
|
+ port = hdr->source;
|
||||||
|
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
|
||||||
|
+ hdr->source = new_port;
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
|
||||||
|
+
|
||||||
|
+MODULE_LICENSE("GPL");
|
||||||
|
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
|
|
@ -0,0 +1,334 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Sun, 7 Jan 2018 01:04:15 +0100
|
||||||
|
Subject: [PATCH] netfilter: flow table support for IPv4
|
||||||
|
|
||||||
|
This patch adds the IPv4 flow table type, that implements the datapath
|
||||||
|
flow table to forward IPv4 traffic. Rationale is:
|
||||||
|
|
||||||
|
1) Look up for the packet in the flow table, from the ingress hook.
|
||||||
|
2) If there's a hit, decrement ttl and pass it on to the neighbour layer
|
||||||
|
for transmission.
|
||||||
|
3) If there's a miss, packet is passed up to the classic forwarding
|
||||||
|
path.
|
||||||
|
|
||||||
|
This patch also supports layer 3 source and destination NAT.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
create mode 100644 net/ipv4/netfilter/nf_flow_table_ipv4.c
|
||||||
|
|
||||||
|
--- a/net/ipv4/netfilter/Kconfig
|
||||||
|
+++ b/net/ipv4/netfilter/Kconfig
|
||||||
|
@@ -78,6 +78,14 @@ config NF_TABLES_ARP
|
||||||
|
|
||||||
|
endif # NF_TABLES
|
||||||
|
|
||||||
|
+config NF_FLOW_TABLE_IPV4
|
||||||
|
+ select NF_FLOW_TABLE
|
||||||
|
+ tristate "Netfilter flow table IPv4 module"
|
||||||
|
+ help
|
||||||
|
+ This option adds the flow table IPv4 support.
|
||||||
|
+
|
||||||
|
+ To compile it as a module, choose M here.
|
||||||
|
+
|
||||||
|
config NF_DUP_IPV4
|
||||||
|
tristate "Netfilter IPv4 packet duplication to alternate destination"
|
||||||
|
depends on !NF_CONNTRACK || NF_CONNTRACK
|
||||||
|
--- a/net/ipv4/netfilter/Makefile
|
||||||
|
+++ b/net/ipv4/netfilter/Makefile
|
||||||
|
@@ -43,6 +43,9 @@ obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redi
|
||||||
|
obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
|
||||||
|
obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
|
||||||
|
|
||||||
|
+# flow table support
|
||||||
|
+obj-$(CONFIG_NF_FLOW_TABLE_IPV4) += nf_flow_table_ipv4.o
|
||||||
|
+
|
||||||
|
# generic IP tables
|
||||||
|
obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
|
||||||
|
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
|
||||||
|
@@ -0,0 +1,283 @@
|
||||||
|
+#include <linux/kernel.h>
|
||||||
|
+#include <linux/init.h>
|
||||||
|
+#include <linux/module.h>
|
||||||
|
+#include <linux/netfilter.h>
|
||||||
|
+#include <linux/rhashtable.h>
|
||||||
|
+#include <linux/ip.h>
|
||||||
|
+#include <linux/netdevice.h>
|
||||||
|
+#include <net/ip.h>
|
||||||
|
+#include <net/neighbour.h>
|
||||||
|
+#include <net/netfilter/nf_flow_table.h>
|
||||||
|
+#include <net/netfilter/nf_tables.h>
|
||||||
|
+/* For layer 4 checksum field offset. */
|
||||||
|
+#include <linux/tcp.h>
|
||||||
|
+#include <linux/udp.h>
|
||||||
|
+
|
||||||
|
+static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
|
||||||
|
+ __be32 addr, __be32 new_addr)
|
||||||
|
+{
|
||||||
|
+ struct tcphdr *tcph;
|
||||||
|
+
|
||||||
|
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
|
||||||
|
+ skb_try_make_writable(skb, thoff + sizeof(*tcph)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ tcph = (void *)(skb_network_header(skb) + thoff);
|
||||||
|
+ inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
|
||||||
|
+ __be32 addr, __be32 new_addr)
|
||||||
|
+{
|
||||||
|
+ struct udphdr *udph;
|
||||||
|
+
|
||||||
|
+ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
|
||||||
|
+ skb_try_make_writable(skb, thoff + sizeof(*udph)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ udph = (void *)(skb_network_header(skb) + thoff);
|
||||||
|
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
|
||||||
|
+ inet_proto_csum_replace4(&udph->check, skb, addr,
|
||||||
|
+ new_addr, true);
|
||||||
|
+ if (!udph->check)
|
||||||
|
+ udph->check = CSUM_MANGLED_0;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
|
||||||
|
+ unsigned int thoff, __be32 addr,
|
||||||
|
+ __be32 new_addr)
|
||||||
|
+{
|
||||||
|
+ switch (iph->protocol) {
|
||||||
|
+ case IPPROTO_TCP:
|
||||||
|
+ if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
|
||||||
|
+ return NF_DROP;
|
||||||
|
+ break;
|
||||||
|
+ case IPPROTO_UDP:
|
||||||
|
+ if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
|
||||||
|
+ return NF_DROP;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
|
||||||
|
+ struct iphdr *iph, unsigned int thoff,
|
||||||
|
+ enum flow_offload_tuple_dir dir)
|
||||||
|
+{
|
||||||
|
+ __be32 addr, new_addr;
|
||||||
|
+
|
||||||
|
+ switch (dir) {
|
||||||
|
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||||
|
+ addr = iph->saddr;
|
||||||
|
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
|
||||||
|
+ iph->saddr = new_addr;
|
||||||
|
+ break;
|
||||||
|
+ case FLOW_OFFLOAD_DIR_REPLY:
|
||||||
|
+ addr = iph->daddr;
|
||||||
|
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
|
||||||
|
+ iph->daddr = new_addr;
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+ csum_replace4(&iph->check, addr, new_addr);
|
||||||
|
+
|
||||||
|
+ return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
|
||||||
|
+ struct iphdr *iph, unsigned int thoff,
|
||||||
|
+ enum flow_offload_tuple_dir dir)
|
||||||
|
+{
|
||||||
|
+ __be32 addr, new_addr;
|
||||||
|
+
|
||||||
|
+ switch (dir) {
|
||||||
|
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||||
|
+ addr = iph->daddr;
|
||||||
|
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
|
||||||
|
+ iph->daddr = new_addr;
|
||||||
|
+ break;
|
||||||
|
+ case FLOW_OFFLOAD_DIR_REPLY:
|
||||||
|
+ addr = iph->saddr;
|
||||||
|
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
|
||||||
|
+ iph->saddr = new_addr;
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
|
||||||
|
+ enum flow_offload_tuple_dir dir)
|
||||||
|
+{
|
||||||
|
+ struct iphdr *iph = ip_hdr(skb);
|
||||||
|
+ unsigned int thoff = iph->ihl * 4;
|
||||||
|
+
|
||||||
|
+ if (flow->flags & FLOW_OFFLOAD_SNAT &&
|
||||||
|
+ (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
|
||||||
|
+ nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
|
||||||
|
+ return -1;
|
||||||
|
+ if (flow->flags & FLOW_OFFLOAD_DNAT &&
|
||||||
|
+ (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
|
||||||
|
+ nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static bool ip_has_options(unsigned int thoff)
|
||||||
|
+{
|
||||||
|
+ return thoff != sizeof(struct iphdr);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
|
||||||
|
+ struct flow_offload_tuple *tuple)
|
||||||
|
+{
|
||||||
|
+ struct flow_ports *ports;
|
||||||
|
+ unsigned int thoff;
|
||||||
|
+ struct iphdr *iph;
|
||||||
|
+
|
||||||
|
+ if (!pskb_may_pull(skb, sizeof(*iph)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ iph = ip_hdr(skb);
|
||||||
|
+ thoff = iph->ihl * 4;
|
||||||
|
+
|
||||||
|
+ if (ip_is_fragment(iph) ||
|
||||||
|
+ unlikely(ip_has_options(thoff)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ if (iph->protocol != IPPROTO_TCP &&
|
||||||
|
+ iph->protocol != IPPROTO_UDP)
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ thoff = iph->ihl * 4;
|
||||||
|
+ if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
|
||||||
|
+
|
||||||
|
+ tuple->src_v4.s_addr = iph->saddr;
|
||||||
|
+ tuple->dst_v4.s_addr = iph->daddr;
|
||||||
|
+ tuple->src_port = ports->source;
|
||||||
|
+ tuple->dst_port = ports->dest;
|
||||||
|
+ tuple->l3proto = AF_INET;
|
||||||
|
+ tuple->l4proto = iph->protocol;
|
||||||
|
+ tuple->iifidx = dev->ifindex;
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Based on ip_exceeds_mtu(). */
|
||||||
|
+static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
|
||||||
|
+{
|
||||||
|
+ if (skb->len <= mtu)
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ return true;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rtable *rt)
|
||||||
|
+{
|
||||||
|
+ u32 mtu;
|
||||||
|
+
|
||||||
|
+ mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
|
||||||
|
+ if (__nf_flow_exceeds_mtu(skb, mtu))
|
||||||
|
+ return true;
|
||||||
|
+
|
||||||
|
+ return false;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static unsigned int
|
||||||
|
+nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
|
||||||
|
+ const struct nf_hook_state *state)
|
||||||
|
+{
|
||||||
|
+ struct flow_offload_tuple_rhash *tuplehash;
|
||||||
|
+ struct nf_flowtable *flow_table = priv;
|
||||||
|
+ struct flow_offload_tuple tuple = {};
|
||||||
|
+ enum flow_offload_tuple_dir dir;
|
||||||
|
+ struct flow_offload *flow;
|
||||||
|
+ struct net_device *outdev;
|
||||||
|
+ const struct rtable *rt;
|
||||||
|
+ struct iphdr *iph;
|
||||||
|
+ __be32 nexthop;
|
||||||
|
+
|
||||||
|
+ if (skb->protocol != htons(ETH_P_IP))
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ tuplehash = flow_offload_lookup(flow_table, &tuple);
|
||||||
|
+ if (tuplehash == NULL)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
|
||||||
|
+ if (!outdev)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ dir = tuplehash->tuple.dir;
|
||||||
|
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
|
||||||
|
+
|
||||||
|
+ rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
|
||||||
|
+ if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ if (skb_try_make_writable(skb, sizeof(*iph)))
|
||||||
|
+ return NF_DROP;
|
||||||
|
+
|
||||||
|
+ if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
|
||||||
|
+ nf_flow_nat_ip(flow, skb, dir) < 0)
|
||||||
|
+ return NF_DROP;
|
||||||
|
+
|
||||||
|
+ flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
|
||||||
|
+ iph = ip_hdr(skb);
|
||||||
|
+ ip_decrease_ttl(iph);
|
||||||
|
+
|
||||||
|
+ skb->dev = outdev;
|
||||||
|
+ nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
|
||||||
|
+ neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
|
||||||
|
+
|
||||||
|
+ return NF_STOLEN;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static struct nf_flowtable_type flowtable_ipv4 = {
|
||||||
|
+ .family = NFPROTO_IPV4,
|
||||||
|
+ .params = &nf_flow_offload_rhash_params,
|
||||||
|
+ .gc = nf_flow_offload_work_gc,
|
||||||
|
+ .hook = nf_flow_offload_ip_hook,
|
||||||
|
+ .owner = THIS_MODULE,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static int __init nf_flow_ipv4_module_init(void)
|
||||||
|
+{
|
||||||
|
+ nft_register_flowtable_type(&flowtable_ipv4);
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void __exit nf_flow_ipv4_module_exit(void)
|
||||||
|
+{
|
||||||
|
+ nft_unregister_flowtable_type(&flowtable_ipv4);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+module_init(nf_flow_ipv4_module_init);
|
||||||
|
+module_exit(nf_flow_ipv4_module_exit);
|
||||||
|
+
|
||||||
|
+MODULE_LICENSE("GPL");
|
||||||
|
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
|
||||||
|
+MODULE_ALIAS_NF_FLOWTABLE(AF_INET);
|
|
@ -0,0 +1,354 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Sun, 7 Jan 2018 01:04:19 +0100
|
||||||
|
Subject: [PATCH] netfilter: flow table support for IPv6
|
||||||
|
|
||||||
|
This patch adds the IPv6 flow table type, that implements the datapath
|
||||||
|
flow table to forward IPv6 traffic.
|
||||||
|
|
||||||
|
This patch exports ip6_dst_mtu_forward() that is required to check for
|
||||||
|
mtu to pass up packets that need PMTUD handling to the classic
|
||||||
|
forwarding path.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
create mode 100644 net/ipv6/netfilter/nf_flow_table_ipv6.c
|
||||||
|
|
||||||
|
--- a/include/net/ipv6.h
|
||||||
|
+++ b/include/net/ipv6.h
|
||||||
|
@@ -889,6 +889,8 @@ static inline struct sk_buff *ip6_finish
|
||||||
|
&inet6_sk(sk)->cork);
|
||||||
|
}
|
||||||
|
|
||||||
|
+unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst);
|
||||||
|
+
|
||||||
|
int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
|
||||||
|
struct flowi6 *fl6);
|
||||||
|
struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
|
||||||
|
--- a/net/ipv6/ip6_output.c
|
||||||
|
+++ b/net/ipv6/ip6_output.c
|
||||||
|
@@ -381,7 +381,7 @@ static inline int ip6_forward_finish(str
|
||||||
|
return dst_output(net, sk, skb);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
|
||||||
|
+unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
|
||||||
|
{
|
||||||
|
unsigned int mtu;
|
||||||
|
struct inet6_dev *idev;
|
||||||
|
@@ -401,6 +401,7 @@ static unsigned int ip6_dst_mtu_forward(
|
||||||
|
|
||||||
|
return mtu;
|
||||||
|
}
|
||||||
|
+EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
|
||||||
|
|
||||||
|
static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
|
||||||
|
{
|
||||||
|
--- a/net/ipv6/netfilter/Kconfig
|
||||||
|
+++ b/net/ipv6/netfilter/Kconfig
|
||||||
|
@@ -99,6 +99,14 @@ config NFT_FIB_IPV6
|
||||||
|
endif # NF_TABLES_IPV6
|
||||||
|
endif # NF_TABLES
|
||||||
|
|
||||||
|
+config NF_FLOW_TABLE_IPV6
|
||||||
|
+ select NF_FLOW_TABLE
|
||||||
|
+ tristate "Netfilter flow table IPv6 module"
|
||||||
|
+ help
|
||||||
|
+ This option adds the flow table IPv6 support.
|
||||||
|
+
|
||||||
|
+ To compile it as a module, choose M here.
|
||||||
|
+
|
||||||
|
config NF_DUP_IPV6
|
||||||
|
tristate "Netfilter IPv6 packet duplication to alternate destination"
|
||||||
|
depends on !NF_CONNTRACK || NF_CONNTRACK
|
||||||
|
--- a/net/ipv6/netfilter/Makefile
|
||||||
|
+++ b/net/ipv6/netfilter/Makefile
|
||||||
|
@@ -45,6 +45,9 @@ obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redi
|
||||||
|
obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
|
||||||
|
obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
|
||||||
|
|
||||||
|
+# flow table support
|
||||||
|
+obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o
|
||||||
|
+
|
||||||
|
# matches
|
||||||
|
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
|
||||||
|
obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
|
||||||
|
@@ -0,0 +1,277 @@
|
||||||
|
+#include <linux/kernel.h>
|
||||||
|
+#include <linux/init.h>
|
||||||
|
+#include <linux/module.h>
|
||||||
|
+#include <linux/netfilter.h>
|
||||||
|
+#include <linux/rhashtable.h>
|
||||||
|
+#include <linux/ipv6.h>
|
||||||
|
+#include <linux/netdevice.h>
|
||||||
|
+#include <linux/ipv6.h>
|
||||||
|
+#include <net/ipv6.h>
|
||||||
|
+#include <net/ip6_route.h>
|
||||||
|
+#include <net/neighbour.h>
|
||||||
|
+#include <net/netfilter/nf_flow_table.h>
|
||||||
|
+#include <net/netfilter/nf_tables.h>
|
||||||
|
+/* For layer 4 checksum field offset. */
|
||||||
|
+#include <linux/tcp.h>
|
||||||
|
+#include <linux/udp.h>
|
||||||
|
+
|
||||||
|
+static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
|
||||||
|
+ struct in6_addr *addr,
|
||||||
|
+ struct in6_addr *new_addr)
|
||||||
|
+{
|
||||||
|
+ struct tcphdr *tcph;
|
||||||
|
+
|
||||||
|
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
|
||||||
|
+ skb_try_make_writable(skb, thoff + sizeof(*tcph)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ tcph = (void *)(skb_network_header(skb) + thoff);
|
||||||
|
+ inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
|
||||||
|
+ new_addr->s6_addr32, true);
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
|
||||||
|
+ struct in6_addr *addr,
|
||||||
|
+ struct in6_addr *new_addr)
|
||||||
|
+{
|
||||||
|
+ struct udphdr *udph;
|
||||||
|
+
|
||||||
|
+ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
|
||||||
|
+ skb_try_make_writable(skb, thoff + sizeof(*udph)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ udph = (void *)(skb_network_header(skb) + thoff);
|
||||||
|
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
|
||||||
|
+ inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
|
||||||
|
+ new_addr->s6_addr32, true);
|
||||||
|
+ if (!udph->check)
|
||||||
|
+ udph->check = CSUM_MANGLED_0;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
|
||||||
|
+ unsigned int thoff, struct in6_addr *addr,
|
||||||
|
+ struct in6_addr *new_addr)
|
||||||
|
+{
|
||||||
|
+ switch (ip6h->nexthdr) {
|
||||||
|
+ case IPPROTO_TCP:
|
||||||
|
+ if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
|
||||||
|
+ return NF_DROP;
|
||||||
|
+ break;
|
||||||
|
+ case IPPROTO_UDP:
|
||||||
|
+ if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
|
||||||
|
+ return NF_DROP;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_snat_ipv6(const struct flow_offload *flow,
|
||||||
|
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
|
||||||
|
+ unsigned int thoff,
|
||||||
|
+ enum flow_offload_tuple_dir dir)
|
||||||
|
+{
|
||||||
|
+ struct in6_addr addr, new_addr;
|
||||||
|
+
|
||||||
|
+ switch (dir) {
|
||||||
|
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||||
|
+ addr = ip6h->saddr;
|
||||||
|
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
|
||||||
|
+ ip6h->saddr = new_addr;
|
||||||
|
+ break;
|
||||||
|
+ case FLOW_OFFLOAD_DIR_REPLY:
|
||||||
|
+ addr = ip6h->daddr;
|
||||||
|
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
|
||||||
|
+ ip6h->daddr = new_addr;
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
|
||||||
|
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
|
||||||
|
+ unsigned int thoff,
|
||||||
|
+ enum flow_offload_tuple_dir dir)
|
||||||
|
+{
|
||||||
|
+ struct in6_addr addr, new_addr;
|
||||||
|
+
|
||||||
|
+ switch (dir) {
|
||||||
|
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||||
|
+ addr = ip6h->daddr;
|
||||||
|
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
|
||||||
|
+ ip6h->daddr = new_addr;
|
||||||
|
+ break;
|
||||||
|
+ case FLOW_OFFLOAD_DIR_REPLY:
|
||||||
|
+ addr = ip6h->saddr;
|
||||||
|
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
|
||||||
|
+ ip6h->saddr = new_addr;
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_nat_ipv6(const struct flow_offload *flow,
|
||||||
|
+ struct sk_buff *skb,
|
||||||
|
+ enum flow_offload_tuple_dir dir)
|
||||||
|
+{
|
||||||
|
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
|
||||||
|
+ unsigned int thoff = sizeof(*ip6h);
|
||||||
|
+
|
||||||
|
+ if (flow->flags & FLOW_OFFLOAD_SNAT &&
|
||||||
|
+ (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
|
||||||
|
+ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
|
||||||
|
+ return -1;
|
||||||
|
+ if (flow->flags & FLOW_OFFLOAD_DNAT &&
|
||||||
|
+ (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
|
||||||
|
+ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
|
||||||
|
+ struct flow_offload_tuple *tuple)
|
||||||
|
+{
|
||||||
|
+ struct flow_ports *ports;
|
||||||
|
+ struct ipv6hdr *ip6h;
|
||||||
|
+ unsigned int thoff;
|
||||||
|
+
|
||||||
|
+ if (!pskb_may_pull(skb, sizeof(*ip6h)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ ip6h = ipv6_hdr(skb);
|
||||||
|
+
|
||||||
|
+ if (ip6h->nexthdr != IPPROTO_TCP &&
|
||||||
|
+ ip6h->nexthdr != IPPROTO_UDP)
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ thoff = sizeof(*ip6h);
|
||||||
|
+ if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
|
||||||
|
+
|
||||||
|
+ tuple->src_v6 = ip6h->saddr;
|
||||||
|
+ tuple->dst_v6 = ip6h->daddr;
|
||||||
|
+ tuple->src_port = ports->source;
|
||||||
|
+ tuple->dst_port = ports->dest;
|
||||||
|
+ tuple->l3proto = AF_INET6;
|
||||||
|
+ tuple->l4proto = ip6h->nexthdr;
|
||||||
|
+ tuple->iifidx = dev->ifindex;
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Based on ip_exceeds_mtu(). */
|
||||||
|
+static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
|
||||||
|
+{
|
||||||
|
+ if (skb->len <= mtu)
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ return true;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt)
|
||||||
|
+{
|
||||||
|
+ u32 mtu;
|
||||||
|
+
|
||||||
|
+ mtu = ip6_dst_mtu_forward(&rt->dst);
|
||||||
|
+ if (__nf_flow_exceeds_mtu(skb, mtu))
|
||||||
|
+ return true;
|
||||||
|
+
|
||||||
|
+ return false;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static unsigned int
|
||||||
|
+nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
|
||||||
|
+ const struct nf_hook_state *state)
|
||||||
|
+{
|
||||||
|
+ struct flow_offload_tuple_rhash *tuplehash;
|
||||||
|
+ struct nf_flowtable *flow_table = priv;
|
||||||
|
+ struct flow_offload_tuple tuple = {};
|
||||||
|
+ enum flow_offload_tuple_dir dir;
|
||||||
|
+ struct flow_offload *flow;
|
||||||
|
+ struct net_device *outdev;
|
||||||
|
+ struct in6_addr *nexthop;
|
||||||
|
+ struct ipv6hdr *ip6h;
|
||||||
|
+ struct rt6_info *rt;
|
||||||
|
+
|
||||||
|
+ if (skb->protocol != htons(ETH_P_IPV6))
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ tuplehash = flow_offload_lookup(flow_table, &tuple);
|
||||||
|
+ if (tuplehash == NULL)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
|
||||||
|
+ if (!outdev)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ dir = tuplehash->tuple.dir;
|
||||||
|
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
|
||||||
|
+
|
||||||
|
+ rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
|
||||||
|
+ if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ if (skb_try_make_writable(skb, sizeof(*ip6h)))
|
||||||
|
+ return NF_DROP;
|
||||||
|
+
|
||||||
|
+ if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
|
||||||
|
+ nf_flow_nat_ipv6(flow, skb, dir) < 0)
|
||||||
|
+ return NF_DROP;
|
||||||
|
+
|
||||||
|
+ flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
|
||||||
|
+ ip6h = ipv6_hdr(skb);
|
||||||
|
+ ip6h->hop_limit--;
|
||||||
|
+
|
||||||
|
+ skb->dev = outdev;
|
||||||
|
+ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
|
||||||
|
+ neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
|
||||||
|
+
|
||||||
|
+ return NF_STOLEN;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static struct nf_flowtable_type flowtable_ipv6 = {
|
||||||
|
+ .family = NFPROTO_IPV6,
|
||||||
|
+ .params = &nf_flow_offload_rhash_params,
|
||||||
|
+ .gc = nf_flow_offload_work_gc,
|
||||||
|
+ .hook = nf_flow_offload_ipv6_hook,
|
||||||
|
+ .owner = THIS_MODULE,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static int __init nf_flow_ipv6_module_init(void)
|
||||||
|
+{
|
||||||
|
+ nft_register_flowtable_type(&flowtable_ipv6);
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void __exit nf_flow_ipv6_module_exit(void)
|
||||||
|
+{
|
||||||
|
+ nft_unregister_flowtable_type(&flowtable_ipv6);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+module_init(nf_flow_ipv6_module_init);
|
||||||
|
+module_exit(nf_flow_ipv6_module_exit);
|
||||||
|
+
|
||||||
|
+MODULE_LICENSE("GPL");
|
||||||
|
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
|
||||||
|
+MODULE_ALIAS_NF_FLOWTABLE(AF_INET6);
|
|
@ -0,0 +1,141 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Sun, 7 Jan 2018 01:04:22 +0100
|
||||||
|
Subject: [PATCH] netfilter: flow table support for the mixed IPv4/IPv6 family
|
||||||
|
|
||||||
|
This patch adds the IPv6 flow table type, that implements the datapath
|
||||||
|
flow table to forward IPv6 traffic.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
create mode 100644 net/netfilter/nf_flow_table_inet.c
|
||||||
|
|
||||||
|
--- a/include/net/netfilter/nf_flow_table.h
|
||||||
|
+++ b/include/net/netfilter/nf_flow_table.h
|
||||||
|
@@ -111,6 +111,11 @@ struct flow_ports {
|
||||||
|
__be16 source, dest;
|
||||||
|
};
|
||||||
|
|
||||||
|
+unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
|
||||||
|
+ const struct nf_hook_state *state);
|
||||||
|
+unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
|
||||||
|
+ const struct nf_hook_state *state);
|
||||||
|
+
|
||||||
|
#define MODULE_ALIAS_NF_FLOWTABLE(family) \
|
||||||
|
MODULE_ALIAS("nf-flowtable-" __stringify(family))
|
||||||
|
|
||||||
|
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
|
||||||
|
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
|
||||||
|
@@ -202,7 +202,7 @@ static bool nf_flow_exceeds_mtu(struct s
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static unsigned int
|
||||||
|
+unsigned int
|
||||||
|
nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
|
||||||
|
const struct nf_hook_state *state)
|
||||||
|
{
|
||||||
|
@@ -254,6 +254,7 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||||
|
|
||||||
|
return NF_STOLEN;
|
||||||
|
}
|
||||||
|
+EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
|
||||||
|
|
||||||
|
static struct nf_flowtable_type flowtable_ipv4 = {
|
||||||
|
.family = NFPROTO_IPV4,
|
||||||
|
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
|
||||||
|
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
|
||||||
|
@@ -196,7 +196,7 @@ static bool nf_flow_exceeds_mtu(struct s
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static unsigned int
|
||||||
|
+unsigned int
|
||||||
|
nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
|
||||||
|
const struct nf_hook_state *state)
|
||||||
|
{
|
||||||
|
@@ -248,6 +248,7 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||||
|
|
||||||
|
return NF_STOLEN;
|
||||||
|
}
|
||||||
|
+EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
|
||||||
|
|
||||||
|
static struct nf_flowtable_type flowtable_ipv6 = {
|
||||||
|
.family = NFPROTO_IPV6,
|
||||||
|
--- a/net/netfilter/Kconfig
|
||||||
|
+++ b/net/netfilter/Kconfig
|
||||||
|
@@ -667,6 +667,14 @@ endif # NF_TABLES_NETDEV
|
||||||
|
|
||||||
|
endif # NF_TABLES
|
||||||
|
|
||||||
|
+config NF_FLOW_TABLE_INET
|
||||||
|
+ select NF_FLOW_TABLE
|
||||||
|
+ tristate "Netfilter flow table mixed IPv4/IPv6 module"
|
||||||
|
+ help
|
||||||
|
+ This option adds the flow table mixed IPv4/IPv6 support.
|
||||||
|
+
|
||||||
|
+ To compile it as a module, choose M here.
|
||||||
|
+
|
||||||
|
config NF_FLOW_TABLE
|
||||||
|
tristate "Netfilter flow table module"
|
||||||
|
help
|
||||||
|
--- a/net/netfilter/Makefile
|
||||||
|
+++ b/net/netfilter/Makefile
|
||||||
|
@@ -112,6 +112,7 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_
|
||||||
|
|
||||||
|
# flow table infrastructure
|
||||||
|
obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
|
||||||
|
+obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
|
||||||
|
|
||||||
|
# generic X tables
|
||||||
|
obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/net/netfilter/nf_flow_table_inet.c
|
||||||
|
@@ -0,0 +1,48 @@
|
||||||
|
+#include <linux/kernel.h>
|
||||||
|
+#include <linux/init.h>
|
||||||
|
+#include <linux/module.h>
|
||||||
|
+#include <linux/netfilter.h>
|
||||||
|
+#include <linux/rhashtable.h>
|
||||||
|
+#include <net/netfilter/nf_flow_table.h>
|
||||||
|
+#include <net/netfilter/nf_tables.h>
|
||||||
|
+
|
||||||
|
+static unsigned int
|
||||||
|
+nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
|
||||||
|
+ const struct nf_hook_state *state)
|
||||||
|
+{
|
||||||
|
+ switch (skb->protocol) {
|
||||||
|
+ case htons(ETH_P_IP):
|
||||||
|
+ return nf_flow_offload_ip_hook(priv, skb, state);
|
||||||
|
+ case htons(ETH_P_IPV6):
|
||||||
|
+ return nf_flow_offload_ipv6_hook(priv, skb, state);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static struct nf_flowtable_type flowtable_inet = {
|
||||||
|
+ .family = NFPROTO_INET,
|
||||||
|
+ .params = &nf_flow_offload_rhash_params,
|
||||||
|
+ .gc = nf_flow_offload_work_gc,
|
||||||
|
+ .hook = nf_flow_offload_inet_hook,
|
||||||
|
+ .owner = THIS_MODULE,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static int __init nf_flow_inet_module_init(void)
|
||||||
|
+{
|
||||||
|
+ nft_register_flowtable_type(&flowtable_inet);
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void __exit nf_flow_inet_module_exit(void)
|
||||||
|
+{
|
||||||
|
+ nft_unregister_flowtable_type(&flowtable_inet);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+module_init(nf_flow_inet_module_init);
|
||||||
|
+module_exit(nf_flow_inet_module_exit);
|
||||||
|
+
|
||||||
|
+MODULE_LICENSE("GPL");
|
||||||
|
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
|
||||||
|
+MODULE_ALIAS_NF_FLOWTABLE(1); /* NFPROTO_INET */
|
|
@ -0,0 +1,332 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Sun, 7 Jan 2018 01:04:26 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_tables: flow offload expression
|
||||||
|
|
||||||
|
Add new instruction for the nf_tables VM that allows us to specify what
|
||||||
|
flows are offloaded into a given flow table via name. This new
|
||||||
|
instruction creates the flow entry and adds it to the flow table.
|
||||||
|
|
||||||
|
Only established flows, ie. we have seen traffic in both directions, are
|
||||||
|
added to the flow table. You can still decide to offload entries at a
|
||||||
|
later stage via packet counting or checking the ct status in case you
|
||||||
|
want to offload assured conntracks.
|
||||||
|
|
||||||
|
This new extension depends on the conntrack subsystem.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
create mode 100644 net/netfilter/nft_flow_offload.c
|
||||||
|
|
||||||
|
--- a/include/uapi/linux/netfilter/nf_tables.h
|
||||||
|
+++ b/include/uapi/linux/netfilter/nf_tables.h
|
||||||
|
@@ -957,6 +957,17 @@ enum nft_ct_attributes {
|
||||||
|
};
|
||||||
|
#define NFTA_CT_MAX (__NFTA_CT_MAX - 1)
|
||||||
|
|
||||||
|
+/**
|
||||||
|
+ * enum nft_flow_attributes - ct offload expression attributes
|
||||||
|
+ * @NFTA_FLOW_TABLE_NAME: flow table name (NLA_STRING)
|
||||||
|
+ */
|
||||||
|
+enum nft_offload_attributes {
|
||||||
|
+ NFTA_FLOW_UNSPEC,
|
||||||
|
+ NFTA_FLOW_TABLE_NAME,
|
||||||
|
+ __NFTA_FLOW_MAX,
|
||||||
|
+};
|
||||||
|
+#define NFTA_FLOW_MAX (__NFTA_FLOW_MAX - 1)
|
||||||
|
+
|
||||||
|
enum nft_limit_type {
|
||||||
|
NFT_LIMIT_PKTS,
|
||||||
|
NFT_LIMIT_PKT_BYTES
|
||||||
|
--- a/net/netfilter/Kconfig
|
||||||
|
+++ b/net/netfilter/Kconfig
|
||||||
|
@@ -515,6 +515,13 @@ config NFT_CT
|
||||||
|
This option adds the "ct" expression that you can use to match
|
||||||
|
connection tracking information such as the flow state.
|
||||||
|
|
||||||
|
+config NFT_FLOW_OFFLOAD
|
||||||
|
+ depends on NF_CONNTRACK
|
||||||
|
+ tristate "Netfilter nf_tables hardware flow offload module"
|
||||||
|
+ help
|
||||||
|
+ This option adds the "flow_offload" expression that you can use to
|
||||||
|
+ choose what flows are placed into the hardware.
|
||||||
|
+
|
||||||
|
config NFT_SET_RBTREE
|
||||||
|
tristate "Netfilter nf_tables rbtree set module"
|
||||||
|
help
|
||||||
|
--- a/net/netfilter/Makefile
|
||||||
|
+++ b/net/netfilter/Makefile
|
||||||
|
@@ -87,6 +87,7 @@ obj-$(CONFIG_NFT_META) += nft_meta.o
|
||||||
|
obj-$(CONFIG_NFT_RT) += nft_rt.o
|
||||||
|
obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o
|
||||||
|
obj-$(CONFIG_NFT_CT) += nft_ct.o
|
||||||
|
+obj-$(CONFIG_NFT_FLOW_OFFLOAD) += nft_flow_offload.o
|
||||||
|
obj-$(CONFIG_NFT_LIMIT) += nft_limit.o
|
||||||
|
obj-$(CONFIG_NFT_NAT) += nft_nat.o
|
||||||
|
obj-$(CONFIG_NFT_OBJREF) += nft_objref.o
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/net/netfilter/nft_flow_offload.c
|
||||||
|
@@ -0,0 +1,264 @@
|
||||||
|
+#include <linux/kernel.h>
|
||||||
|
+#include <linux/module.h>
|
||||||
|
+#include <linux/init.h>
|
||||||
|
+#include <linux/netlink.h>
|
||||||
|
+#include <linux/netfilter.h>
|
||||||
|
+#include <linux/workqueue.h>
|
||||||
|
+#include <linux/spinlock.h>
|
||||||
|
+#include <linux/netfilter/nf_tables.h>
|
||||||
|
+#include <net/ip.h> /* for ipv4 options. */
|
||||||
|
+#include <net/netfilter/nf_tables.h>
|
||||||
|
+#include <net/netfilter/nf_tables_core.h>
|
||||||
|
+#include <net/netfilter/nf_conntrack_core.h>
|
||||||
|
+#include <linux/netfilter/nf_conntrack_common.h>
|
||||||
|
+#include <net/netfilter/nf_flow_table.h>
|
||||||
|
+
|
||||||
|
+struct nft_flow_offload {
|
||||||
|
+ struct nft_flowtable *flowtable;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static int nft_flow_route(const struct nft_pktinfo *pkt,
|
||||||
|
+ const struct nf_conn *ct,
|
||||||
|
+ struct nf_flow_route *route,
|
||||||
|
+ enum ip_conntrack_dir dir)
|
||||||
|
+{
|
||||||
|
+ struct dst_entry *this_dst = skb_dst(pkt->skb);
|
||||||
|
+ struct dst_entry *other_dst = NULL;
|
||||||
|
+ struct flowi fl;
|
||||||
|
+
|
||||||
|
+ memset(&fl, 0, sizeof(fl));
|
||||||
|
+ switch (nft_pf(pkt)) {
|
||||||
|
+ case NFPROTO_IPV4:
|
||||||
|
+ fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
|
||||||
|
+ break;
|
||||||
|
+ case NFPROTO_IPV6:
|
||||||
|
+ fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt));
|
||||||
|
+ if (!other_dst)
|
||||||
|
+ return -ENOENT;
|
||||||
|
+
|
||||||
|
+ route->tuple[dir].dst = this_dst;
|
||||||
|
+ route->tuple[dir].ifindex = nft_in(pkt)->ifindex;
|
||||||
|
+ route->tuple[!dir].dst = other_dst;
|
||||||
|
+ route->tuple[!dir].ifindex = nft_out(pkt)->ifindex;
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static bool nft_flow_offload_skip(struct sk_buff *skb)
|
||||||
|
+{
|
||||||
|
+ struct ip_options *opt = &(IPCB(skb)->opt);
|
||||||
|
+
|
||||||
|
+ if (unlikely(opt->optlen))
|
||||||
|
+ return true;
|
||||||
|
+ if (skb_sec_path(skb))
|
||||||
|
+ return true;
|
||||||
|
+
|
||||||
|
+ return false;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void nft_flow_offload_eval(const struct nft_expr *expr,
|
||||||
|
+ struct nft_regs *regs,
|
||||||
|
+ const struct nft_pktinfo *pkt)
|
||||||
|
+{
|
||||||
|
+ struct nft_flow_offload *priv = nft_expr_priv(expr);
|
||||||
|
+ struct nf_flowtable *flowtable = &priv->flowtable->data;
|
||||||
|
+ enum ip_conntrack_info ctinfo;
|
||||||
|
+ struct nf_flow_route route;
|
||||||
|
+ struct flow_offload *flow;
|
||||||
|
+ enum ip_conntrack_dir dir;
|
||||||
|
+ struct nf_conn *ct;
|
||||||
|
+ int ret;
|
||||||
|
+
|
||||||
|
+ if (nft_flow_offload_skip(pkt->skb))
|
||||||
|
+ goto out;
|
||||||
|
+
|
||||||
|
+ ct = nf_ct_get(pkt->skb, &ctinfo);
|
||||||
|
+ if (!ct)
|
||||||
|
+ goto out;
|
||||||
|
+
|
||||||
|
+ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
|
||||||
|
+ case IPPROTO_TCP:
|
||||||
|
+ case IPPROTO_UDP:
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ goto out;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (test_bit(IPS_HELPER_BIT, &ct->status))
|
||||||
|
+ goto out;
|
||||||
|
+
|
||||||
|
+ if (ctinfo == IP_CT_NEW ||
|
||||||
|
+ ctinfo == IP_CT_RELATED)
|
||||||
|
+ goto out;
|
||||||
|
+
|
||||||
|
+ if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
|
||||||
|
+ goto out;
|
||||||
|
+
|
||||||
|
+ dir = CTINFO2DIR(ctinfo);
|
||||||
|
+ if (nft_flow_route(pkt, ct, &route, dir) < 0)
|
||||||
|
+ goto err_flow_route;
|
||||||
|
+
|
||||||
|
+ flow = flow_offload_alloc(ct, &route);
|
||||||
|
+ if (!flow)
|
||||||
|
+ goto err_flow_alloc;
|
||||||
|
+
|
||||||
|
+ ret = flow_offload_add(flowtable, flow);
|
||||||
|
+ if (ret < 0)
|
||||||
|
+ goto err_flow_add;
|
||||||
|
+
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+err_flow_add:
|
||||||
|
+ flow_offload_free(flow);
|
||||||
|
+err_flow_alloc:
|
||||||
|
+ dst_release(route.tuple[!dir].dst);
|
||||||
|
+err_flow_route:
|
||||||
|
+ clear_bit(IPS_OFFLOAD_BIT, &ct->status);
|
||||||
|
+out:
|
||||||
|
+ regs->verdict.code = NFT_BREAK;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nft_flow_offload_validate(const struct nft_ctx *ctx,
|
||||||
|
+ const struct nft_expr *expr,
|
||||||
|
+ const struct nft_data **data)
|
||||||
|
+{
|
||||||
|
+ unsigned int hook_mask = (1 << NF_INET_FORWARD);
|
||||||
|
+
|
||||||
|
+ return nft_chain_validate_hooks(ctx->chain, hook_mask);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nft_flow_offload_init(const struct nft_ctx *ctx,
|
||||||
|
+ const struct nft_expr *expr,
|
||||||
|
+ const struct nlattr * const tb[])
|
||||||
|
+{
|
||||||
|
+ struct nft_flow_offload *priv = nft_expr_priv(expr);
|
||||||
|
+ u8 genmask = nft_genmask_next(ctx->net);
|
||||||
|
+ struct nft_flowtable *flowtable;
|
||||||
|
+
|
||||||
|
+ if (!tb[NFTA_FLOW_TABLE_NAME])
|
||||||
|
+ return -EINVAL;
|
||||||
|
+
|
||||||
|
+ flowtable = nf_tables_flowtable_lookup(ctx->table,
|
||||||
|
+ tb[NFTA_FLOW_TABLE_NAME],
|
||||||
|
+ genmask);
|
||||||
|
+ if (IS_ERR(flowtable))
|
||||||
|
+ return PTR_ERR(flowtable);
|
||||||
|
+
|
||||||
|
+ priv->flowtable = flowtable;
|
||||||
|
+ flowtable->use++;
|
||||||
|
+
|
||||||
|
+ return nf_ct_netns_get(ctx->net, ctx->afi->family);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
|
||||||
|
+ const struct nft_expr *expr)
|
||||||
|
+{
|
||||||
|
+ struct nft_flow_offload *priv = nft_expr_priv(expr);
|
||||||
|
+
|
||||||
|
+ priv->flowtable->use--;
|
||||||
|
+ nf_ct_netns_put(ctx->net, ctx->afi->family);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nft_flow_offload_dump(struct sk_buff *skb, const struct nft_expr *expr)
|
||||||
|
+{
|
||||||
|
+ struct nft_flow_offload *priv = nft_expr_priv(expr);
|
||||||
|
+
|
||||||
|
+ if (nla_put_string(skb, NFTA_FLOW_TABLE_NAME, priv->flowtable->name))
|
||||||
|
+ goto nla_put_failure;
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+nla_put_failure:
|
||||||
|
+ return -1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static struct nft_expr_type nft_flow_offload_type;
|
||||||
|
+static const struct nft_expr_ops nft_flow_offload_ops = {
|
||||||
|
+ .type = &nft_flow_offload_type,
|
||||||
|
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_flow_offload)),
|
||||||
|
+ .eval = nft_flow_offload_eval,
|
||||||
|
+ .init = nft_flow_offload_init,
|
||||||
|
+ .destroy = nft_flow_offload_destroy,
|
||||||
|
+ .validate = nft_flow_offload_validate,
|
||||||
|
+ .dump = nft_flow_offload_dump,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static struct nft_expr_type nft_flow_offload_type __read_mostly = {
|
||||||
|
+ .name = "flow_offload",
|
||||||
|
+ .ops = &nft_flow_offload_ops,
|
||||||
|
+ .maxattr = NFTA_FLOW_MAX,
|
||||||
|
+ .owner = THIS_MODULE,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static void flow_offload_iterate_cleanup(struct flow_offload *flow, void *data)
|
||||||
|
+{
|
||||||
|
+ struct net_device *dev = data;
|
||||||
|
+
|
||||||
|
+ if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ flow_offload_dead(flow);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void nft_flow_offload_iterate_cleanup(struct nf_flowtable *flowtable,
|
||||||
|
+ void *data)
|
||||||
|
+{
|
||||||
|
+ nf_flow_table_iterate(flowtable, flow_offload_iterate_cleanup, data);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int flow_offload_netdev_event(struct notifier_block *this,
|
||||||
|
+ unsigned long event, void *ptr)
|
||||||
|
+{
|
||||||
|
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
|
||||||
|
+
|
||||||
|
+ if (event != NETDEV_DOWN)
|
||||||
|
+ return NOTIFY_DONE;
|
||||||
|
+
|
||||||
|
+ nft_flow_table_iterate(dev_net(dev), nft_flow_offload_iterate_cleanup, dev);
|
||||||
|
+
|
||||||
|
+ return NOTIFY_DONE;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static struct notifier_block flow_offload_netdev_notifier = {
|
||||||
|
+ .notifier_call = flow_offload_netdev_event,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static int __init nft_flow_offload_module_init(void)
|
||||||
|
+{
|
||||||
|
+ int err;
|
||||||
|
+
|
||||||
|
+ register_netdevice_notifier(&flow_offload_netdev_notifier);
|
||||||
|
+
|
||||||
|
+ err = nft_register_expr(&nft_flow_offload_type);
|
||||||
|
+ if (err < 0)
|
||||||
|
+ goto register_expr;
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+register_expr:
|
||||||
|
+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
|
||||||
|
+ return err;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void __exit nft_flow_offload_module_exit(void)
|
||||||
|
+{
|
||||||
|
+ struct net *net;
|
||||||
|
+
|
||||||
|
+ nft_unregister_expr(&nft_flow_offload_type);
|
||||||
|
+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
|
||||||
|
+ rtnl_lock();
|
||||||
|
+ for_each_net(net)
|
||||||
|
+ nft_flow_table_iterate(net, nft_flow_offload_iterate_cleanup, NULL);
|
||||||
|
+ rtnl_unlock();
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+module_init(nft_flow_offload_module_init);
|
||||||
|
+module_exit(nft_flow_offload_module_exit);
|
||||||
|
+
|
||||||
|
+MODULE_LICENSE("GPL");
|
||||||
|
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
|
||||||
|
+MODULE_ALIAS_NFT_EXPR("flow_offload");
|
|
@ -0,0 +1,113 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Tue, 19 Dec 2017 13:53:45 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_tables: remove nhooks field from struct
|
||||||
|
nft_af_info
|
||||||
|
|
||||||
|
We already validate the hook through bitmask, so this check is
|
||||||
|
superfluous. When removing this, this patch is also fixing a bug in the
|
||||||
|
new flowtable codebase, since ctx->afi points to the table family
|
||||||
|
instead of the netdev family which is where the flowtable is really
|
||||||
|
hooked in.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/net/netfilter/nf_tables.h
|
||||||
|
+++ b/include/net/netfilter/nf_tables.h
|
||||||
|
@@ -968,7 +968,6 @@ enum nft_af_flags {
|
||||||
|
*
|
||||||
|
* @list: used internally
|
||||||
|
* @family: address family
|
||||||
|
- * @nhooks: number of hooks in this family
|
||||||
|
* @owner: module owner
|
||||||
|
* @tables: used internally
|
||||||
|
* @flags: family flags
|
||||||
|
@@ -976,7 +975,6 @@ enum nft_af_flags {
|
||||||
|
struct nft_af_info {
|
||||||
|
struct list_head list;
|
||||||
|
int family;
|
||||||
|
- unsigned int nhooks;
|
||||||
|
struct module *owner;
|
||||||
|
struct list_head tables;
|
||||||
|
u32 flags;
|
||||||
|
--- a/net/bridge/netfilter/nf_tables_bridge.c
|
||||||
|
+++ b/net/bridge/netfilter/nf_tables_bridge.c
|
||||||
|
@@ -44,7 +44,6 @@ nft_do_chain_bridge(void *priv,
|
||||||
|
|
||||||
|
static struct nft_af_info nft_af_bridge __read_mostly = {
|
||||||
|
.family = NFPROTO_BRIDGE,
|
||||||
|
- .nhooks = NF_BR_NUMHOOKS,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
};
|
||||||
|
|
||||||
|
--- a/net/ipv4/netfilter/nf_tables_arp.c
|
||||||
|
+++ b/net/ipv4/netfilter/nf_tables_arp.c
|
||||||
|
@@ -29,7 +29,6 @@ nft_do_chain_arp(void *priv,
|
||||||
|
|
||||||
|
static struct nft_af_info nft_af_arp __read_mostly = {
|
||||||
|
.family = NFPROTO_ARP,
|
||||||
|
- .nhooks = NF_ARP_NUMHOOKS,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
};
|
||||||
|
|
||||||
|
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
|
||||||
|
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
|
||||||
|
@@ -32,7 +32,6 @@ static unsigned int nft_do_chain_ipv4(vo
|
||||||
|
|
||||||
|
static struct nft_af_info nft_af_ipv4 __read_mostly = {
|
||||||
|
.family = NFPROTO_IPV4,
|
||||||
|
- .nhooks = NF_INET_NUMHOOKS,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
};
|
||||||
|
|
||||||
|
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
|
||||||
|
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
|
||||||
|
@@ -30,7 +30,6 @@ static unsigned int nft_do_chain_ipv6(vo
|
||||||
|
|
||||||
|
static struct nft_af_info nft_af_ipv6 __read_mostly = {
|
||||||
|
.family = NFPROTO_IPV6,
|
||||||
|
- .nhooks = NF_INET_NUMHOOKS,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
};
|
||||||
|
|
||||||
|
--- a/net/netfilter/nf_tables_api.c
|
||||||
|
+++ b/net/netfilter/nf_tables_api.c
|
||||||
|
@@ -1359,9 +1359,6 @@ static int nft_chain_parse_hook(struct n
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
|
||||||
|
- if (hook->num >= afi->nhooks)
|
||||||
|
- return -EINVAL;
|
||||||
|
-
|
||||||
|
hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
|
||||||
|
|
||||||
|
type = chain_type[afi->family][NFT_CHAIN_T_DEFAULT];
|
||||||
|
@@ -4987,7 +4984,7 @@ static int nf_tables_flowtable_parse_hoo
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
hooknum = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_NUM]));
|
||||||
|
- if (hooknum >= ctx->afi->nhooks)
|
||||||
|
+ if (hooknum != NF_NETDEV_INGRESS)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY]));
|
||||||
|
--- a/net/netfilter/nf_tables_inet.c
|
||||||
|
+++ b/net/netfilter/nf_tables_inet.c
|
||||||
|
@@ -40,7 +40,6 @@ static unsigned int nft_do_chain_inet(vo
|
||||||
|
|
||||||
|
static struct nft_af_info nft_af_inet __read_mostly = {
|
||||||
|
.family = NFPROTO_INET,
|
||||||
|
- .nhooks = NF_INET_NUMHOOKS,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
};
|
||||||
|
|
||||||
|
--- a/net/netfilter/nf_tables_netdev.c
|
||||||
|
+++ b/net/netfilter/nf_tables_netdev.c
|
||||||
|
@@ -40,7 +40,6 @@ nft_do_chain_netdev(void *priv, struct s
|
||||||
|
|
||||||
|
static struct nft_af_info nft_af_netdev __read_mostly = {
|
||||||
|
.family = NFPROTO_NETDEV,
|
||||||
|
- .nhooks = NF_NETDEV_NUMHOOKS,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
.flags = NFT_AF_NEEDS_DEV,
|
||||||
|
};
|
|
@ -0,0 +1,22 @@
|
||||||
|
From: Wei Yongjun <weiyongjun1@huawei.com>
|
||||||
|
Date: Wed, 10 Jan 2018 07:04:54 +0000
|
||||||
|
Subject: [PATCH] netfilter: nf_tables: fix a typo in nf_tables_getflowtable()
|
||||||
|
|
||||||
|
Fix a typo, we should check 'flowtable' instead of 'table'.
|
||||||
|
|
||||||
|
Fixes: 3b49e2e94e6e ("netfilter: nf_tables: add flow table netlink frontend")
|
||||||
|
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/netfilter/nf_tables_api.c
|
||||||
|
+++ b/net/netfilter/nf_tables_api.c
|
||||||
|
@@ -5411,7 +5411,7 @@ static int nf_tables_getflowtable(struct
|
||||||
|
|
||||||
|
flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
|
||||||
|
genmask);
|
||||||
|
- if (IS_ERR(table))
|
||||||
|
+ if (IS_ERR(flowtable))
|
||||||
|
return PTR_ERR(flowtable);
|
||||||
|
|
||||||
|
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
|
|
@ -0,0 +1,106 @@
|
||||||
|
From: Arnd Bergmann <arnd@arndb.de>
|
||||||
|
Date: Wed, 10 Jan 2018 18:10:59 +0100
|
||||||
|
Subject: [PATCH] netfilter: improve flow table Kconfig dependencies
|
||||||
|
|
||||||
|
The newly added NF_FLOW_TABLE options cause some build failures in
|
||||||
|
randconfig kernels:
|
||||||
|
|
||||||
|
- when CONFIG_NF_CONNTRACK is disabled, or is a loadable module but
|
||||||
|
NF_FLOW_TABLE is built-in:
|
||||||
|
|
||||||
|
In file included from net/netfilter/nf_flow_table.c:8:0:
|
||||||
|
include/net/netfilter/nf_conntrack.h:59:22: error: field 'ct_general' has incomplete type
|
||||||
|
struct nf_conntrack ct_general;
|
||||||
|
include/net/netfilter/nf_conntrack.h: In function 'nf_ct_get':
|
||||||
|
include/net/netfilter/nf_conntrack.h:148:15: error: 'const struct sk_buff' has no member named '_nfct'
|
||||||
|
include/net/netfilter/nf_conntrack.h: In function 'nf_ct_put':
|
||||||
|
include/net/netfilter/nf_conntrack.h:157:2: error: implicit declaration of function 'nf_conntrack_put'; did you mean 'nf_ct_put'? [-Werror=implicit-function-declaration]
|
||||||
|
|
||||||
|
net/netfilter/nf_flow_table.o: In function `nf_flow_offload_work_gc':
|
||||||
|
(.text+0x1540): undefined reference to `nf_ct_delete'
|
||||||
|
|
||||||
|
- when CONFIG_NF_TABLES is disabled:
|
||||||
|
|
||||||
|
In file included from net/ipv6/netfilter/nf_flow_table_ipv6.c:13:0:
|
||||||
|
include/net/netfilter/nf_tables.h: In function 'nft_gencursor_next':
|
||||||
|
include/net/netfilter/nf_tables.h:1189:14: error: 'const struct net' has no member named 'nft'; did you mean 'nf'?
|
||||||
|
|
||||||
|
- when CONFIG_NF_FLOW_TABLE_INET is enabled, but NF_FLOW_TABLE_IPV4
|
||||||
|
or NF_FLOW_TABLE_IPV6 are not, or are loadable modules
|
||||||
|
|
||||||
|
net/netfilter/nf_flow_table_inet.o: In function `nf_flow_offload_inet_hook':
|
||||||
|
nf_flow_table_inet.c:(.text+0x94): undefined reference to `nf_flow_offload_ipv6_hook'
|
||||||
|
nf_flow_table_inet.c:(.text+0x40): undefined reference to `nf_flow_offload_ip_hook'
|
||||||
|
|
||||||
|
- when CONFIG_NF_FLOW_TABLES is disabled, but the other options are
|
||||||
|
enabled:
|
||||||
|
|
||||||
|
net/netfilter/nf_flow_table_inet.o: In function `nf_flow_offload_inet_hook':
|
||||||
|
nf_flow_table_inet.c:(.text+0x6c): undefined reference to `nf_flow_offload_ipv6_hook'
|
||||||
|
net/netfilter/nf_flow_table_inet.o: In function `nf_flow_inet_module_exit':
|
||||||
|
nf_flow_table_inet.c:(.exit.text+0x8): undefined reference to `nft_unregister_flowtable_type'
|
||||||
|
net/netfilter/nf_flow_table_inet.o: In function `nf_flow_inet_module_init':
|
||||||
|
nf_flow_table_inet.c:(.init.text+0x8): undefined reference to `nft_register_flowtable_type'
|
||||||
|
net/ipv4/netfilter/nf_flow_table_ipv4.o: In function `nf_flow_ipv4_module_exit':
|
||||||
|
nf_flow_table_ipv4.c:(.exit.text+0x8): undefined reference to `nft_unregister_flowtable_type'
|
||||||
|
net/ipv4/netfilter/nf_flow_table_ipv4.o: In function `nf_flow_ipv4_module_init':
|
||||||
|
nf_flow_table_ipv4.c:(.init.text+0x8): undefined reference to `nft_register_flowtable_type'
|
||||||
|
|
||||||
|
This adds additional Kconfig dependencies to ensure that NF_CONNTRACK and NF_TABLES
|
||||||
|
are always visible from NF_FLOW_TABLE, and that the internal dependencies between
|
||||||
|
the four new modules are met.
|
||||||
|
|
||||||
|
Fixes: 7c23b629a808 ("netfilter: flow table support for the mixed IPv4/IPv6 family")
|
||||||
|
Fixes: 0995210753a2 ("netfilter: flow table support for IPv6")
|
||||||
|
Fixes: 97add9f0d66d ("netfilter: flow table support for IPv4")
|
||||||
|
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/ipv4/netfilter/Kconfig
|
||||||
|
+++ b/net/ipv4/netfilter/Kconfig
|
||||||
|
@@ -79,8 +79,9 @@ config NF_TABLES_ARP
|
||||||
|
endif # NF_TABLES
|
||||||
|
|
||||||
|
config NF_FLOW_TABLE_IPV4
|
||||||
|
- select NF_FLOW_TABLE
|
||||||
|
tristate "Netfilter flow table IPv4 module"
|
||||||
|
+ depends on NF_CONNTRACK && NF_TABLES
|
||||||
|
+ select NF_FLOW_TABLE
|
||||||
|
help
|
||||||
|
This option adds the flow table IPv4 support.
|
||||||
|
|
||||||
|
--- a/net/ipv6/netfilter/Kconfig
|
||||||
|
+++ b/net/ipv6/netfilter/Kconfig
|
||||||
|
@@ -100,8 +100,9 @@ endif # NF_TABLES_IPV6
|
||||||
|
endif # NF_TABLES
|
||||||
|
|
||||||
|
config NF_FLOW_TABLE_IPV6
|
||||||
|
- select NF_FLOW_TABLE
|
||||||
|
tristate "Netfilter flow table IPv6 module"
|
||||||
|
+ depends on NF_CONNTRACK && NF_TABLES
|
||||||
|
+ select NF_FLOW_TABLE
|
||||||
|
help
|
||||||
|
This option adds the flow table IPv6 support.
|
||||||
|
|
||||||
|
--- a/net/netfilter/Kconfig
|
||||||
|
+++ b/net/netfilter/Kconfig
|
||||||
|
@@ -675,8 +675,9 @@ endif # NF_TABLES_NETDEV
|
||||||
|
endif # NF_TABLES
|
||||||
|
|
||||||
|
config NF_FLOW_TABLE_INET
|
||||||
|
- select NF_FLOW_TABLE
|
||||||
|
tristate "Netfilter flow table mixed IPv4/IPv6 module"
|
||||||
|
+ depends on NF_FLOW_TABLE_IPV4 && NF_FLOW_TABLE_IPV6
|
||||||
|
+ select NF_FLOW_TABLE
|
||||||
|
help
|
||||||
|
This option adds the flow table mixed IPv4/IPv6 support.
|
||||||
|
|
||||||
|
@@ -684,6 +685,7 @@ config NF_FLOW_TABLE_INET
|
||||||
|
|
||||||
|
config NF_FLOW_TABLE
|
||||||
|
tristate "Netfilter flow table module"
|
||||||
|
+ depends on NF_CONNTRACK && NF_TABLES
|
||||||
|
help
|
||||||
|
This option adds the flow table core infrastructure.
|
||||||
|
|
|
@ -0,0 +1,59 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Tue, 19 Dec 2017 14:07:52 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_tables: remove flag field from struct
|
||||||
|
nft_af_info
|
||||||
|
|
||||||
|
Replace it by a direct check for the netdev protocol family.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/net/netfilter/nf_tables.h
|
||||||
|
+++ b/include/net/netfilter/nf_tables.h
|
||||||
|
@@ -959,10 +959,6 @@ struct nft_table {
|
||||||
|
char *name;
|
||||||
|
};
|
||||||
|
|
||||||
|
-enum nft_af_flags {
|
||||||
|
- NFT_AF_NEEDS_DEV = (1 << 0),
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
/**
|
||||||
|
* struct nft_af_info - nf_tables address family info
|
||||||
|
*
|
||||||
|
@@ -970,14 +966,12 @@ enum nft_af_flags {
|
||||||
|
* @family: address family
|
||||||
|
* @owner: module owner
|
||||||
|
* @tables: used internally
|
||||||
|
- * @flags: family flags
|
||||||
|
*/
|
||||||
|
struct nft_af_info {
|
||||||
|
struct list_head list;
|
||||||
|
int family;
|
||||||
|
struct module *owner;
|
||||||
|
struct list_head tables;
|
||||||
|
- u32 flags;
|
||||||
|
};
|
||||||
|
|
||||||
|
int nft_register_afinfo(struct net *, struct nft_af_info *);
|
||||||
|
--- a/net/netfilter/nf_tables_api.c
|
||||||
|
+++ b/net/netfilter/nf_tables_api.c
|
||||||
|
@@ -1376,7 +1376,7 @@ static int nft_chain_parse_hook(struct n
|
||||||
|
hook->type = type;
|
||||||
|
|
||||||
|
hook->dev = NULL;
|
||||||
|
- if (afi->flags & NFT_AF_NEEDS_DEV) {
|
||||||
|
+ if (afi->family == NFPROTO_NETDEV) {
|
||||||
|
char ifname[IFNAMSIZ];
|
||||||
|
|
||||||
|
if (!ha[NFTA_HOOK_DEV]) {
|
||||||
|
--- a/net/netfilter/nf_tables_netdev.c
|
||||||
|
+++ b/net/netfilter/nf_tables_netdev.c
|
||||||
|
@@ -41,7 +41,6 @@ nft_do_chain_netdev(void *priv, struct s
|
||||||
|
static struct nft_af_info nft_af_netdev __read_mostly = {
|
||||||
|
.family = NFPROTO_NETDEV,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
- .flags = NFT_AF_NEEDS_DEV,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int nf_tables_netdev_init_net(struct net *net)
|
|
@ -0,0 +1,80 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Tue, 19 Dec 2017 12:17:52 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_tables: no need for struct nft_af_info to
|
||||||
|
enable/disable table
|
||||||
|
|
||||||
|
nf_tables_table_enable() and nf_tables_table_disable() take a pointer to
|
||||||
|
struct nft_af_info that is never used, remove it.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/netfilter/nf_tables_api.c
|
||||||
|
+++ b/net/netfilter/nf_tables_api.c
|
||||||
|
@@ -640,10 +640,7 @@ err:
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static void _nf_tables_table_disable(struct net *net,
|
||||||
|
- const struct nft_af_info *afi,
|
||||||
|
- struct nft_table *table,
|
||||||
|
- u32 cnt)
|
||||||
|
+static void nft_table_disable(struct net *net, struct nft_table *table, u32 cnt)
|
||||||
|
{
|
||||||
|
struct nft_chain *chain;
|
||||||
|
u32 i = 0;
|
||||||
|
@@ -661,9 +658,7 @@ static void _nf_tables_table_disable(str
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
-static int nf_tables_table_enable(struct net *net,
|
||||||
|
- const struct nft_af_info *afi,
|
||||||
|
- struct nft_table *table)
|
||||||
|
+static int nf_tables_table_enable(struct net *net, struct nft_table *table)
|
||||||
|
{
|
||||||
|
struct nft_chain *chain;
|
||||||
|
int err, i = 0;
|
||||||
|
@@ -683,15 +678,13 @@ static int nf_tables_table_enable(struct
|
||||||
|
return 0;
|
||||||
|
err:
|
||||||
|
if (i)
|
||||||
|
- _nf_tables_table_disable(net, afi, table, i);
|
||||||
|
+ nft_table_disable(net, table, i);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static void nf_tables_table_disable(struct net *net,
|
||||||
|
- const struct nft_af_info *afi,
|
||||||
|
- struct nft_table *table)
|
||||||
|
+static void nf_tables_table_disable(struct net *net, struct nft_table *table)
|
||||||
|
{
|
||||||
|
- _nf_tables_table_disable(net, afi, table, 0);
|
||||||
|
+ nft_table_disable(net, table, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int nf_tables_updtable(struct nft_ctx *ctx)
|
||||||
|
@@ -720,7 +713,7 @@ static int nf_tables_updtable(struct nft
|
||||||
|
nft_trans_table_enable(trans) = false;
|
||||||
|
} else if (!(flags & NFT_TABLE_F_DORMANT) &&
|
||||||
|
ctx->table->flags & NFT_TABLE_F_DORMANT) {
|
||||||
|
- ret = nf_tables_table_enable(ctx->net, ctx->afi, ctx->table);
|
||||||
|
+ ret = nf_tables_table_enable(ctx->net, ctx->table);
|
||||||
|
if (ret >= 0) {
|
||||||
|
ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
|
||||||
|
nft_trans_table_enable(trans) = true;
|
||||||
|
@@ -5792,7 +5785,6 @@ static int nf_tables_commit(struct net *
|
||||||
|
if (nft_trans_table_update(trans)) {
|
||||||
|
if (!nft_trans_table_enable(trans)) {
|
||||||
|
nf_tables_table_disable(net,
|
||||||
|
- trans->ctx.afi,
|
||||||
|
trans->ctx.table);
|
||||||
|
trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
|
||||||
|
}
|
||||||
|
@@ -5956,7 +5948,6 @@ static int nf_tables_abort(struct net *n
|
||||||
|
if (nft_trans_table_update(trans)) {
|
||||||
|
if (nft_trans_table_enable(trans)) {
|
||||||
|
nf_tables_table_disable(net,
|
||||||
|
- trans->ctx.afi,
|
||||||
|
trans->ctx.table);
|
||||||
|
trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
|
||||||
|
}
|
|
@ -0,0 +1,60 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Tue, 19 Dec 2017 13:40:22 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_tables: remove struct nft_af_info parameter in
|
||||||
|
nf_tables_chain_type_lookup()
|
||||||
|
|
||||||
|
Pass family number instead, this comes in preparation for the removal of
|
||||||
|
struct nft_af_info.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/netfilter/nf_tables_api.c
|
||||||
|
+++ b/net/netfilter/nf_tables_api.c
|
||||||
|
@@ -452,7 +452,7 @@ static inline u64 nf_tables_alloc_handle
|
||||||
|
static const struct nf_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX];
|
||||||
|
|
||||||
|
static const struct nf_chain_type *
|
||||||
|
-__nf_tables_chain_type_lookup(int family, const struct nlattr *nla)
|
||||||
|
+__nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
@@ -465,22 +465,20 @@ __nf_tables_chain_type_lookup(int family
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct nf_chain_type *
|
||||||
|
-nf_tables_chain_type_lookup(const struct nft_af_info *afi,
|
||||||
|
- const struct nlattr *nla,
|
||||||
|
- bool autoload)
|
||||||
|
+nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family, bool autoload)
|
||||||
|
{
|
||||||
|
const struct nf_chain_type *type;
|
||||||
|
|
||||||
|
- type = __nf_tables_chain_type_lookup(afi->family, nla);
|
||||||
|
+ type = __nf_tables_chain_type_lookup(nla, family);
|
||||||
|
if (type != NULL)
|
||||||
|
return type;
|
||||||
|
#ifdef CONFIG_MODULES
|
||||||
|
if (autoload) {
|
||||||
|
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
|
||||||
|
- request_module("nft-chain-%u-%.*s", afi->family,
|
||||||
|
+ request_module("nft-chain-%u-%.*s", family,
|
||||||
|
nla_len(nla), (const char *)nla_data(nla));
|
||||||
|
nfnl_lock(NFNL_SUBSYS_NFTABLES);
|
||||||
|
- type = __nf_tables_chain_type_lookup(afi->family, nla);
|
||||||
|
+ type = __nf_tables_chain_type_lookup(nla, family);
|
||||||
|
if (type != NULL)
|
||||||
|
return ERR_PTR(-EAGAIN);
|
||||||
|
}
|
||||||
|
@@ -1356,8 +1354,8 @@ static int nft_chain_parse_hook(struct n
|
||||||
|
|
||||||
|
type = chain_type[afi->family][NFT_CHAIN_T_DEFAULT];
|
||||||
|
if (nla[NFTA_CHAIN_TYPE]) {
|
||||||
|
- type = nf_tables_chain_type_lookup(afi, nla[NFTA_CHAIN_TYPE],
|
||||||
|
- create);
|
||||||
|
+ type = nf_tables_chain_type_lookup(nla[NFTA_CHAIN_TYPE],
|
||||||
|
+ afi->family, create);
|
||||||
|
if (IS_ERR(type))
|
||||||
|
return PTR_ERR(type);
|
||||||
|
}
|
|
@ -0,0 +1,30 @@
|
||||||
|
From: Hangbin Liu <liuhangbin@gmail.com>
|
||||||
|
Date: Mon, 25 Dec 2017 11:34:54 +0800
|
||||||
|
Subject: [PATCH] netfilter: nf_tables: fix potential NULL-ptr deref in
|
||||||
|
nf_tables_dump_obj_done()
|
||||||
|
|
||||||
|
If there is no NFTA_OBJ_TABLE and NFTA_OBJ_TYPE, the c.data will be NULL in
|
||||||
|
nf_tables_getobj(). So before free filter->table in nf_tables_dump_obj_done(),
|
||||||
|
we need to check if filter is NULL first.
|
||||||
|
|
||||||
|
Fixes: e46abbcc05aa ("netfilter: nf_tables: Allow table names of up to 255 chars")
|
||||||
|
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
|
||||||
|
Acked-by: Phil Sutter <phil@nwl.cc>
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/netfilter/nf_tables_api.c
|
||||||
|
+++ b/net/netfilter/nf_tables_api.c
|
||||||
|
@@ -5330,8 +5330,10 @@ static int nf_tables_dump_flowtable_done
|
||||||
|
if (!filter)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
- kfree(filter->table);
|
||||||
|
- kfree(filter);
|
||||||
|
+ if (filter) {
|
||||||
|
+ kfree(filter->table);
|
||||||
|
+ kfree(filter);
|
||||||
|
+ }
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,100 @@
|
||||||
|
From: Vasily Averin <vvs@virtuozzo.com>
|
||||||
|
Date: Sun, 12 Nov 2017 14:32:37 +0300
|
||||||
|
Subject: [PATCH] netfilter: exit_net cleanup check added
|
||||||
|
|
||||||
|
Be sure that lists initialized in net_init hook was return to initial
|
||||||
|
state.
|
||||||
|
|
||||||
|
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
|
||||||
|
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
|
||||||
|
@@ -834,6 +834,7 @@ static void clusterip_net_exit(struct ne
|
||||||
|
cn->procdir = NULL;
|
||||||
|
#endif
|
||||||
|
nf_unregister_net_hook(net, &cip_arp_ops);
|
||||||
|
+ WARN_ON_ONCE(!list_empty(&cn->configs));
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct pernet_operations clusterip_net_ops = {
|
||||||
|
--- a/net/netfilter/nf_tables_api.c
|
||||||
|
+++ b/net/netfilter/nf_tables_api.c
|
||||||
|
@@ -6545,6 +6545,12 @@ static int __net_init nf_tables_init_net
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static void __net_exit nf_tables_exit_net(struct net *net)
|
||||||
|
+{
|
||||||
|
+ WARN_ON_ONCE(!list_empty(&net->nft.af_info));
|
||||||
|
+ WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
int __nft_release_basechain(struct nft_ctx *ctx)
|
||||||
|
{
|
||||||
|
struct nft_rule *rule, *nr;
|
||||||
|
@@ -6622,6 +6628,7 @@ static void __nft_release_afinfo(struct
|
||||||
|
|
||||||
|
static struct pernet_operations nf_tables_net_ops = {
|
||||||
|
.init = nf_tables_init_net,
|
||||||
|
+ .exit = nf_tables_exit_net,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __init nf_tables_module_init(void)
|
||||||
|
--- a/net/netfilter/nfnetlink_log.c
|
||||||
|
+++ b/net/netfilter/nfnetlink_log.c
|
||||||
|
@@ -1093,10 +1093,15 @@ static int __net_init nfnl_log_net_init(
|
||||||
|
|
||||||
|
static void __net_exit nfnl_log_net_exit(struct net *net)
|
||||||
|
{
|
||||||
|
+ struct nfnl_log_net *log = nfnl_log_pernet(net);
|
||||||
|
+ unsigned int i;
|
||||||
|
+
|
||||||
|
#ifdef CONFIG_PROC_FS
|
||||||
|
remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter);
|
||||||
|
#endif
|
||||||
|
nf_log_unset(net, &nfulnl_logger);
|
||||||
|
+ for (i = 0; i < INSTANCE_BUCKETS; i++)
|
||||||
|
+ WARN_ON_ONCE(!hlist_empty(&log->instance_table[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct pernet_operations nfnl_log_net_ops = {
|
||||||
|
--- a/net/netfilter/nfnetlink_queue.c
|
||||||
|
+++ b/net/netfilter/nfnetlink_queue.c
|
||||||
|
@@ -1510,10 +1510,15 @@ static int __net_init nfnl_queue_net_ini
|
||||||
|
|
||||||
|
static void __net_exit nfnl_queue_net_exit(struct net *net)
|
||||||
|
{
|
||||||
|
+ struct nfnl_queue_net *q = nfnl_queue_pernet(net);
|
||||||
|
+ unsigned int i;
|
||||||
|
+
|
||||||
|
nf_unregister_queue_handler(net);
|
||||||
|
#ifdef CONFIG_PROC_FS
|
||||||
|
remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter);
|
||||||
|
#endif
|
||||||
|
+ for (i = 0; i < INSTANCE_BUCKETS; i++)
|
||||||
|
+ WARN_ON_ONCE(!hlist_empty(&q->instance_table[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void nfnl_queue_net_exit_batch(struct list_head *net_exit_list)
|
||||||
|
--- a/net/netfilter/x_tables.c
|
||||||
|
+++ b/net/netfilter/x_tables.c
|
||||||
|
@@ -1785,8 +1785,17 @@ static int __net_init xt_net_init(struct
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static void __net_exit xt_net_exit(struct net *net)
|
||||||
|
+{
|
||||||
|
+ int i;
|
||||||
|
+
|
||||||
|
+ for (i = 0; i < NFPROTO_NUMPROTO; i++)
|
||||||
|
+ WARN_ON_ONCE(!list_empty(&net->xt.tables[i]));
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static struct pernet_operations xt_net_ops = {
|
||||||
|
.init = xt_net_init,
|
||||||
|
+ .exit = xt_net_exit,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __init xt_init(void)
|
|
@ -0,0 +1,598 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Tue, 9 Jan 2018 02:42:11 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_tables: get rid of pernet families
|
||||||
|
|
||||||
|
Now that we have a single table list for each netns, we can get rid of
|
||||||
|
one pointer per family and the global afinfo list, thus, shrinking
|
||||||
|
struct netns for nftables that now becomes 64 bytes smaller.
|
||||||
|
|
||||||
|
And call __nft_release_afinfo() from __net_exit path accordingly to
|
||||||
|
release netnamespace objects on removal.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/net/netfilter/nf_tables.h
|
||||||
|
+++ b/include/net/netfilter/nf_tables.h
|
||||||
|
@@ -974,8 +974,8 @@ struct nft_af_info {
|
||||||
|
struct module *owner;
|
||||||
|
};
|
||||||
|
|
||||||
|
-int nft_register_afinfo(struct net *, struct nft_af_info *);
|
||||||
|
-void nft_unregister_afinfo(struct net *, struct nft_af_info *);
|
||||||
|
+int nft_register_afinfo(struct nft_af_info *);
|
||||||
|
+void nft_unregister_afinfo(struct nft_af_info *);
|
||||||
|
|
||||||
|
int nft_register_chain_type(const struct nf_chain_type *);
|
||||||
|
void nft_unregister_chain_type(const struct nf_chain_type *);
|
||||||
|
--- a/include/net/netns/nftables.h
|
||||||
|
+++ b/include/net/netns/nftables.h
|
||||||
|
@@ -7,15 +7,8 @@
|
||||||
|
struct nft_af_info;
|
||||||
|
|
||||||
|
struct netns_nftables {
|
||||||
|
- struct list_head af_info;
|
||||||
|
struct list_head tables;
|
||||||
|
struct list_head commit_list;
|
||||||
|
- struct nft_af_info *ipv4;
|
||||||
|
- struct nft_af_info *ipv6;
|
||||||
|
- struct nft_af_info *inet;
|
||||||
|
- struct nft_af_info *arp;
|
||||||
|
- struct nft_af_info *bridge;
|
||||||
|
- struct nft_af_info *netdev;
|
||||||
|
unsigned int base_seq;
|
||||||
|
u8 gencursor;
|
||||||
|
};
|
||||||
|
--- a/net/bridge/netfilter/nf_tables_bridge.c
|
||||||
|
+++ b/net/bridge/netfilter/nf_tables_bridge.c
|
||||||
|
@@ -47,34 +47,6 @@ static struct nft_af_info nft_af_bridge
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
};
|
||||||
|
|
||||||
|
-static int nf_tables_bridge_init_net(struct net *net)
|
||||||
|
-{
|
||||||
|
- net->nft.bridge = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
|
||||||
|
- if (net->nft.bridge == NULL)
|
||||||
|
- return -ENOMEM;
|
||||||
|
-
|
||||||
|
- memcpy(net->nft.bridge, &nft_af_bridge, sizeof(nft_af_bridge));
|
||||||
|
-
|
||||||
|
- if (nft_register_afinfo(net, net->nft.bridge) < 0)
|
||||||
|
- goto err;
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-err:
|
||||||
|
- kfree(net->nft.bridge);
|
||||||
|
- return -ENOMEM;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static void nf_tables_bridge_exit_net(struct net *net)
|
||||||
|
-{
|
||||||
|
- nft_unregister_afinfo(net, net->nft.bridge);
|
||||||
|
- kfree(net->nft.bridge);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static struct pernet_operations nf_tables_bridge_net_ops = {
|
||||||
|
- .init = nf_tables_bridge_init_net,
|
||||||
|
- .exit = nf_tables_bridge_exit_net,
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
static const struct nf_chain_type filter_bridge = {
|
||||||
|
.name = "filter",
|
||||||
|
.type = NFT_CHAIN_T_DEFAULT,
|
||||||
|
@@ -98,17 +70,17 @@ static int __init nf_tables_bridge_init(
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
- ret = nft_register_chain_type(&filter_bridge);
|
||||||
|
+ ret = nft_register_afinfo(&nft_af_bridge);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
- ret = register_pernet_subsys(&nf_tables_bridge_net_ops);
|
||||||
|
+ ret = nft_register_chain_type(&filter_bridge);
|
||||||
|
if (ret < 0)
|
||||||
|
- goto err_register_subsys;
|
||||||
|
+ goto err_register_chain;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
-err_register_subsys:
|
||||||
|
+err_register_chain:
|
||||||
|
nft_unregister_chain_type(&filter_bridge);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
@@ -116,8 +88,8 @@ err_register_subsys:
|
||||||
|
|
||||||
|
static void __exit nf_tables_bridge_exit(void)
|
||||||
|
{
|
||||||
|
- unregister_pernet_subsys(&nf_tables_bridge_net_ops);
|
||||||
|
nft_unregister_chain_type(&filter_bridge);
|
||||||
|
+ nft_unregister_afinfo(&nft_af_bridge);
|
||||||
|
}
|
||||||
|
|
||||||
|
module_init(nf_tables_bridge_init);
|
||||||
|
--- a/net/ipv4/netfilter/nf_tables_arp.c
|
||||||
|
+++ b/net/ipv4/netfilter/nf_tables_arp.c
|
||||||
|
@@ -32,34 +32,6 @@ static struct nft_af_info nft_af_arp __r
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
};
|
||||||
|
|
||||||
|
-static int nf_tables_arp_init_net(struct net *net)
|
||||||
|
-{
|
||||||
|
- net->nft.arp = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
|
||||||
|
- if (net->nft.arp== NULL)
|
||||||
|
- return -ENOMEM;
|
||||||
|
-
|
||||||
|
- memcpy(net->nft.arp, &nft_af_arp, sizeof(nft_af_arp));
|
||||||
|
-
|
||||||
|
- if (nft_register_afinfo(net, net->nft.arp) < 0)
|
||||||
|
- goto err;
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-err:
|
||||||
|
- kfree(net->nft.arp);
|
||||||
|
- return -ENOMEM;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static void nf_tables_arp_exit_net(struct net *net)
|
||||||
|
-{
|
||||||
|
- nft_unregister_afinfo(net, net->nft.arp);
|
||||||
|
- kfree(net->nft.arp);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static struct pernet_operations nf_tables_arp_net_ops = {
|
||||||
|
- .init = nf_tables_arp_init_net,
|
||||||
|
- .exit = nf_tables_arp_exit_net,
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
static const struct nf_chain_type filter_arp = {
|
||||||
|
.name = "filter",
|
||||||
|
.type = NFT_CHAIN_T_DEFAULT,
|
||||||
|
@@ -77,21 +49,26 @@ static int __init nf_tables_arp_init(voi
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
- ret = nft_register_chain_type(&filter_arp);
|
||||||
|
+ ret = nft_register_afinfo(&nft_af_arp);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
- ret = register_pernet_subsys(&nf_tables_arp_net_ops);
|
||||||
|
+ ret = nft_register_chain_type(&filter_arp);
|
||||||
|
if (ret < 0)
|
||||||
|
- nft_unregister_chain_type(&filter_arp);
|
||||||
|
+ goto err_register_chain;
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+err_register_chain:
|
||||||
|
+ nft_unregister_chain_type(&filter_arp);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __exit nf_tables_arp_exit(void)
|
||||||
|
{
|
||||||
|
- unregister_pernet_subsys(&nf_tables_arp_net_ops);
|
||||||
|
nft_unregister_chain_type(&filter_arp);
|
||||||
|
+ nft_unregister_afinfo(&nft_af_arp);
|
||||||
|
}
|
||||||
|
|
||||||
|
module_init(nf_tables_arp_init);
|
||||||
|
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
|
||||||
|
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
|
||||||
|
@@ -35,34 +35,6 @@ static struct nft_af_info nft_af_ipv4 __
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
};
|
||||||
|
|
||||||
|
-static int nf_tables_ipv4_init_net(struct net *net)
|
||||||
|
-{
|
||||||
|
- net->nft.ipv4 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
|
||||||
|
- if (net->nft.ipv4 == NULL)
|
||||||
|
- return -ENOMEM;
|
||||||
|
-
|
||||||
|
- memcpy(net->nft.ipv4, &nft_af_ipv4, sizeof(nft_af_ipv4));
|
||||||
|
-
|
||||||
|
- if (nft_register_afinfo(net, net->nft.ipv4) < 0)
|
||||||
|
- goto err;
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-err:
|
||||||
|
- kfree(net->nft.ipv4);
|
||||||
|
- return -ENOMEM;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static void nf_tables_ipv4_exit_net(struct net *net)
|
||||||
|
-{
|
||||||
|
- nft_unregister_afinfo(net, net->nft.ipv4);
|
||||||
|
- kfree(net->nft.ipv4);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static struct pernet_operations nf_tables_ipv4_net_ops = {
|
||||||
|
- .init = nf_tables_ipv4_init_net,
|
||||||
|
- .exit = nf_tables_ipv4_exit_net,
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
static const struct nf_chain_type filter_ipv4 = {
|
||||||
|
.name = "filter",
|
||||||
|
.type = NFT_CHAIN_T_DEFAULT,
|
||||||
|
@@ -86,21 +58,25 @@ static int __init nf_tables_ipv4_init(vo
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
- ret = nft_register_chain_type(&filter_ipv4);
|
||||||
|
+ ret = nft_register_afinfo(&nft_af_ipv4);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
- ret = register_pernet_subsys(&nf_tables_ipv4_net_ops);
|
||||||
|
+ ret = nft_register_chain_type(&filter_ipv4);
|
||||||
|
if (ret < 0)
|
||||||
|
- nft_unregister_chain_type(&filter_ipv4);
|
||||||
|
+ goto err_register_chain;
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
|
||||||
|
+err_register_chain:
|
||||||
|
+ nft_unregister_afinfo(&nft_af_ipv4);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __exit nf_tables_ipv4_exit(void)
|
||||||
|
{
|
||||||
|
- unregister_pernet_subsys(&nf_tables_ipv4_net_ops);
|
||||||
|
nft_unregister_chain_type(&filter_ipv4);
|
||||||
|
+ nft_unregister_afinfo(&nft_af_ipv4);
|
||||||
|
}
|
||||||
|
|
||||||
|
module_init(nf_tables_ipv4_init);
|
||||||
|
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
|
||||||
|
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
|
||||||
|
@@ -33,34 +33,6 @@ static struct nft_af_info nft_af_ipv6 __
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
};
|
||||||
|
|
||||||
|
-static int nf_tables_ipv6_init_net(struct net *net)
|
||||||
|
-{
|
||||||
|
- net->nft.ipv6 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
|
||||||
|
- if (net->nft.ipv6 == NULL)
|
||||||
|
- return -ENOMEM;
|
||||||
|
-
|
||||||
|
- memcpy(net->nft.ipv6, &nft_af_ipv6, sizeof(nft_af_ipv6));
|
||||||
|
-
|
||||||
|
- if (nft_register_afinfo(net, net->nft.ipv6) < 0)
|
||||||
|
- goto err;
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-err:
|
||||||
|
- kfree(net->nft.ipv6);
|
||||||
|
- return -ENOMEM;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static void nf_tables_ipv6_exit_net(struct net *net)
|
||||||
|
-{
|
||||||
|
- nft_unregister_afinfo(net, net->nft.ipv6);
|
||||||
|
- kfree(net->nft.ipv6);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static struct pernet_operations nf_tables_ipv6_net_ops = {
|
||||||
|
- .init = nf_tables_ipv6_init_net,
|
||||||
|
- .exit = nf_tables_ipv6_exit_net,
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
static const struct nf_chain_type filter_ipv6 = {
|
||||||
|
.name = "filter",
|
||||||
|
.type = NFT_CHAIN_T_DEFAULT,
|
||||||
|
@@ -84,20 +56,24 @@ static int __init nf_tables_ipv6_init(vo
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
- ret = nft_register_chain_type(&filter_ipv6);
|
||||||
|
+ ret = nft_register_afinfo(&nft_af_ipv6);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
- ret = register_pernet_subsys(&nf_tables_ipv6_net_ops);
|
||||||
|
+ ret = nft_register_chain_type(&filter_ipv6);
|
||||||
|
if (ret < 0)
|
||||||
|
- nft_unregister_chain_type(&filter_ipv6);
|
||||||
|
+ goto err_register_chain;
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
|
||||||
|
+err_register_chain:
|
||||||
|
+ nft_unregister_afinfo(&nft_af_ipv6);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __exit nf_tables_ipv6_exit(void)
|
||||||
|
{
|
||||||
|
- unregister_pernet_subsys(&nf_tables_ipv6_net_ops);
|
||||||
|
+ nft_unregister_afinfo(&nft_af_ipv6);
|
||||||
|
nft_unregister_chain_type(&filter_ipv6);
|
||||||
|
}
|
||||||
|
|
||||||
|
--- a/net/netfilter/nf_tables_api.c
|
||||||
|
+++ b/net/netfilter/nf_tables_api.c
|
||||||
|
@@ -26,6 +26,7 @@
|
||||||
|
static LIST_HEAD(nf_tables_expressions);
|
||||||
|
static LIST_HEAD(nf_tables_objects);
|
||||||
|
static LIST_HEAD(nf_tables_flowtables);
|
||||||
|
+static LIST_HEAD(nf_tables_af_info);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* nft_register_afinfo - register nf_tables address family info
|
||||||
|
@@ -35,17 +36,15 @@ static LIST_HEAD(nf_tables_flowtables);
|
||||||
|
* Register the address family for use with nf_tables. Returns zero on
|
||||||
|
* success or a negative errno code otherwise.
|
||||||
|
*/
|
||||||
|
-int nft_register_afinfo(struct net *net, struct nft_af_info *afi)
|
||||||
|
+int nft_register_afinfo(struct nft_af_info *afi)
|
||||||
|
{
|
||||||
|
nfnl_lock(NFNL_SUBSYS_NFTABLES);
|
||||||
|
- list_add_tail_rcu(&afi->list, &net->nft.af_info);
|
||||||
|
+ list_add_tail_rcu(&afi->list, &nf_tables_af_info);
|
||||||
|
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nft_register_afinfo);
|
||||||
|
|
||||||
|
-static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi);
|
||||||
|
-
|
||||||
|
/**
|
||||||
|
* nft_unregister_afinfo - unregister nf_tables address family info
|
||||||
|
*
|
||||||
|
@@ -53,10 +52,9 @@ static void __nft_release_afinfo(struct
|
||||||
|
*
|
||||||
|
* Unregister the address family for use with nf_tables.
|
||||||
|
*/
|
||||||
|
-void nft_unregister_afinfo(struct net *net, struct nft_af_info *afi)
|
||||||
|
+void nft_unregister_afinfo(struct nft_af_info *afi)
|
||||||
|
{
|
||||||
|
nfnl_lock(NFNL_SUBSYS_NFTABLES);
|
||||||
|
- __nft_release_afinfo(net, afi);
|
||||||
|
list_del_rcu(&afi->list);
|
||||||
|
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
|
||||||
|
}
|
||||||
|
@@ -66,7 +64,7 @@ static struct nft_af_info *nft_afinfo_lo
|
||||||
|
{
|
||||||
|
struct nft_af_info *afi;
|
||||||
|
|
||||||
|
- list_for_each_entry(afi, &net->nft.af_info, list) {
|
||||||
|
+ list_for_each_entry(afi, &nf_tables_af_info, list) {
|
||||||
|
if (afi->family == family)
|
||||||
|
return afi;
|
||||||
|
}
|
||||||
|
@@ -5036,15 +5034,12 @@ void nft_flow_table_iterate(struct net *
|
||||||
|
void *data)
|
||||||
|
{
|
||||||
|
struct nft_flowtable *flowtable;
|
||||||
|
- const struct nft_af_info *afi;
|
||||||
|
const struct nft_table *table;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
- list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
|
||||||
|
- list_for_each_entry_rcu(table, &net->nft.tables, list) {
|
||||||
|
- list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
|
||||||
|
- iter(&flowtable->data, data);
|
||||||
|
- }
|
||||||
|
+ list_for_each_entry_rcu(table, &net->nft.tables, list) {
|
||||||
|
+ list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
|
||||||
|
+ iter(&flowtable->data, data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
@@ -6536,21 +6531,6 @@ int nft_data_dump(struct sk_buff *skb, i
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nft_data_dump);
|
||||||
|
|
||||||
|
-static int __net_init nf_tables_init_net(struct net *net)
|
||||||
|
-{
|
||||||
|
- INIT_LIST_HEAD(&net->nft.af_info);
|
||||||
|
- INIT_LIST_HEAD(&net->nft.tables);
|
||||||
|
- INIT_LIST_HEAD(&net->nft.commit_list);
|
||||||
|
- net->nft.base_seq = 1;
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static void __net_exit nf_tables_exit_net(struct net *net)
|
||||||
|
-{
|
||||||
|
- WARN_ON_ONCE(!list_empty(&net->nft.af_info));
|
||||||
|
- WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
int __nft_release_basechain(struct nft_ctx *ctx)
|
||||||
|
{
|
||||||
|
struct nft_rule *rule, *nr;
|
||||||
|
@@ -6571,8 +6551,7 @@ int __nft_release_basechain(struct nft_c
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(__nft_release_basechain);
|
||||||
|
|
||||||
|
-/* Called by nft_unregister_afinfo() from __net_exit path, nfnl_lock is held. */
|
||||||
|
-static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
|
||||||
|
+static void __nft_release_afinfo(struct net *net)
|
||||||
|
{
|
||||||
|
struct nft_flowtable *flowtable, *nf;
|
||||||
|
struct nft_table *table, *nt;
|
||||||
|
@@ -6582,10 +6561,11 @@ static void __nft_release_afinfo(struct
|
||||||
|
struct nft_set *set, *ns;
|
||||||
|
struct nft_ctx ctx = {
|
||||||
|
.net = net,
|
||||||
|
- .family = afi->family,
|
||||||
|
};
|
||||||
|
|
||||||
|
list_for_each_entry_safe(table, nt, &net->nft.tables, list) {
|
||||||
|
+ ctx.family = table->afi->family;
|
||||||
|
+
|
||||||
|
list_for_each_entry(chain, &table->chains, list)
|
||||||
|
nf_tables_unregister_hook(net, table, chain);
|
||||||
|
list_for_each_entry(flowtable, &table->flowtables, list)
|
||||||
|
@@ -6626,6 +6606,21 @@ static void __nft_release_afinfo(struct
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+static int __net_init nf_tables_init_net(struct net *net)
|
||||||
|
+{
|
||||||
|
+ INIT_LIST_HEAD(&net->nft.tables);
|
||||||
|
+ INIT_LIST_HEAD(&net->nft.commit_list);
|
||||||
|
+ net->nft.base_seq = 1;
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void __net_exit nf_tables_exit_net(struct net *net)
|
||||||
|
+{
|
||||||
|
+ __nft_release_afinfo(net);
|
||||||
|
+ WARN_ON_ONCE(!list_empty(&net->nft.tables));
|
||||||
|
+ WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static struct pernet_operations nf_tables_net_ops = {
|
||||||
|
.init = nf_tables_init_net,
|
||||||
|
.exit = nf_tables_exit_net,
|
||||||
|
--- a/net/netfilter/nf_tables_inet.c
|
||||||
|
+++ b/net/netfilter/nf_tables_inet.c
|
||||||
|
@@ -43,34 +43,6 @@ static struct nft_af_info nft_af_inet __
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
};
|
||||||
|
|
||||||
|
-static int __net_init nf_tables_inet_init_net(struct net *net)
|
||||||
|
-{
|
||||||
|
- net->nft.inet = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
|
||||||
|
- if (net->nft.inet == NULL)
|
||||||
|
- return -ENOMEM;
|
||||||
|
- memcpy(net->nft.inet, &nft_af_inet, sizeof(nft_af_inet));
|
||||||
|
-
|
||||||
|
- if (nft_register_afinfo(net, net->nft.inet) < 0)
|
||||||
|
- goto err;
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-
|
||||||
|
-err:
|
||||||
|
- kfree(net->nft.inet);
|
||||||
|
- return -ENOMEM;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static void __net_exit nf_tables_inet_exit_net(struct net *net)
|
||||||
|
-{
|
||||||
|
- nft_unregister_afinfo(net, net->nft.inet);
|
||||||
|
- kfree(net->nft.inet);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static struct pernet_operations nf_tables_inet_net_ops = {
|
||||||
|
- .init = nf_tables_inet_init_net,
|
||||||
|
- .exit = nf_tables_inet_exit_net,
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
static const struct nf_chain_type filter_inet = {
|
||||||
|
.name = "filter",
|
||||||
|
.type = NFT_CHAIN_T_DEFAULT,
|
||||||
|
@@ -94,21 +66,24 @@ static int __init nf_tables_inet_init(vo
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
- ret = nft_register_chain_type(&filter_inet);
|
||||||
|
- if (ret < 0)
|
||||||
|
+ if (nft_register_afinfo(&nft_af_inet) < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
- ret = register_pernet_subsys(&nf_tables_inet_net_ops);
|
||||||
|
+ ret = nft_register_chain_type(&filter_inet);
|
||||||
|
if (ret < 0)
|
||||||
|
- nft_unregister_chain_type(&filter_inet);
|
||||||
|
+ goto err_register_chain;
|
||||||
|
+
|
||||||
|
+ return ret;
|
||||||
|
|
||||||
|
+err_register_chain:
|
||||||
|
+ nft_unregister_afinfo(&nft_af_inet);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __exit nf_tables_inet_exit(void)
|
||||||
|
{
|
||||||
|
- unregister_pernet_subsys(&nf_tables_inet_net_ops);
|
||||||
|
nft_unregister_chain_type(&filter_inet);
|
||||||
|
+ nft_unregister_afinfo(&nft_af_inet);
|
||||||
|
}
|
||||||
|
|
||||||
|
module_init(nf_tables_inet_init);
|
||||||
|
--- a/net/netfilter/nf_tables_netdev.c
|
||||||
|
+++ b/net/netfilter/nf_tables_netdev.c
|
||||||
|
@@ -43,34 +43,6 @@ static struct nft_af_info nft_af_netdev
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
};
|
||||||
|
|
||||||
|
-static int nf_tables_netdev_init_net(struct net *net)
|
||||||
|
-{
|
||||||
|
- net->nft.netdev = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
|
||||||
|
- if (net->nft.netdev == NULL)
|
||||||
|
- return -ENOMEM;
|
||||||
|
-
|
||||||
|
- memcpy(net->nft.netdev, &nft_af_netdev, sizeof(nft_af_netdev));
|
||||||
|
-
|
||||||
|
- if (nft_register_afinfo(net, net->nft.netdev) < 0)
|
||||||
|
- goto err;
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-err:
|
||||||
|
- kfree(net->nft.netdev);
|
||||||
|
- return -ENOMEM;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static void nf_tables_netdev_exit_net(struct net *net)
|
||||||
|
-{
|
||||||
|
- nft_unregister_afinfo(net, net->nft.netdev);
|
||||||
|
- kfree(net->nft.netdev);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static struct pernet_operations nf_tables_netdev_net_ops = {
|
||||||
|
- .init = nf_tables_netdev_init_net,
|
||||||
|
- .exit = nf_tables_netdev_exit_net,
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
static const struct nf_chain_type nft_filter_chain_netdev = {
|
||||||
|
.name = "filter",
|
||||||
|
.type = NFT_CHAIN_T_DEFAULT,
|
||||||
|
@@ -145,32 +117,32 @@ static int __init nf_tables_netdev_init(
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
- ret = nft_register_chain_type(&nft_filter_chain_netdev);
|
||||||
|
- if (ret)
|
||||||
|
+ if (nft_register_afinfo(&nft_af_netdev) < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
- ret = register_pernet_subsys(&nf_tables_netdev_net_ops);
|
||||||
|
+ ret = nft_register_chain_type(&nft_filter_chain_netdev);
|
||||||
|
if (ret)
|
||||||
|
- goto err1;
|
||||||
|
+ goto err_register_chain_type;
|
||||||
|
|
||||||
|
ret = register_netdevice_notifier(&nf_tables_netdev_notifier);
|
||||||
|
if (ret)
|
||||||
|
- goto err2;
|
||||||
|
+ goto err_register_netdevice_notifier;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
-err2:
|
||||||
|
- unregister_pernet_subsys(&nf_tables_netdev_net_ops);
|
||||||
|
-err1:
|
||||||
|
+err_register_netdevice_notifier:
|
||||||
|
nft_unregister_chain_type(&nft_filter_chain_netdev);
|
||||||
|
+err_register_chain_type:
|
||||||
|
+ nft_unregister_afinfo(&nft_af_netdev);
|
||||||
|
+
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __exit nf_tables_netdev_exit(void)
|
||||||
|
{
|
||||||
|
unregister_netdevice_notifier(&nf_tables_netdev_notifier);
|
||||||
|
- unregister_pernet_subsys(&nf_tables_netdev_net_ops);
|
||||||
|
nft_unregister_chain_type(&nft_filter_chain_netdev);
|
||||||
|
+ nft_unregister_afinfo(&nft_af_netdev);
|
||||||
|
}
|
||||||
|
|
||||||
|
module_init(nf_tables_netdev_init);
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,47 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Thu, 1 Feb 2018 18:49:00 +0100
|
||||||
|
Subject: [PATCH] netfilter: nft_flow_offload: wait for garbage collector
|
||||||
|
to run after cleanup
|
||||||
|
|
||||||
|
If netdevice goes down, then flowtable entries are scheduled to be
|
||||||
|
removed. Wait for garbage collector to have a chance to run so it can
|
||||||
|
delete them from the hashtable.
|
||||||
|
|
||||||
|
The flush call might sleep, so hold the nfnl mutex from
|
||||||
|
nft_flow_table_iterate() instead of rcu read side lock. The use of the
|
||||||
|
nfnl mutex is also implicitly fixing races between updates via nfnetlink
|
||||||
|
and netdevice event.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/netfilter/nf_tables_api.c
|
||||||
|
+++ b/net/netfilter/nf_tables_api.c
|
||||||
|
@@ -4886,13 +4886,13 @@ void nft_flow_table_iterate(struct net *
|
||||||
|
struct nft_flowtable *flowtable;
|
||||||
|
const struct nft_table *table;
|
||||||
|
|
||||||
|
- rcu_read_lock();
|
||||||
|
- list_for_each_entry_rcu(table, &net->nft.tables, list) {
|
||||||
|
- list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
|
||||||
|
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
|
||||||
|
+ list_for_each_entry(table, &net->nft.tables, list) {
|
||||||
|
+ list_for_each_entry(flowtable, &table->flowtables, list) {
|
||||||
|
iter(&flowtable->data, data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
- rcu_read_unlock();
|
||||||
|
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nft_flow_table_iterate);
|
||||||
|
|
||||||
|
--- a/net/netfilter/nft_flow_offload.c
|
||||||
|
+++ b/net/netfilter/nft_flow_offload.c
|
||||||
|
@@ -208,6 +208,7 @@ static void nft_flow_offload_iterate_cle
|
||||||
|
void *data)
|
||||||
|
{
|
||||||
|
nf_flow_table_iterate(flowtable, flow_offload_iterate_cleanup, data);
|
||||||
|
+ flush_delayed_work(&flowtable->gc_work);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int flow_offload_netdev_event(struct notifier_block *this,
|
|
@ -0,0 +1,29 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Thu, 1 Feb 2018 18:49:01 +0100
|
||||||
|
Subject: [PATCH] netfilter: nft_flow_offload: no need to flush entries on
|
||||||
|
module removal
|
||||||
|
|
||||||
|
nft_flow_offload module removal does not require to flush existing
|
||||||
|
flowtables, it is valid to remove this module while keeping flowtables
|
||||||
|
around.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/netfilter/nft_flow_offload.c
|
||||||
|
+++ b/net/netfilter/nft_flow_offload.c
|
||||||
|
@@ -247,14 +247,8 @@ register_expr:
|
||||||
|
|
||||||
|
static void __exit nft_flow_offload_module_exit(void)
|
||||||
|
{
|
||||||
|
- struct net *net;
|
||||||
|
-
|
||||||
|
nft_unregister_expr(&nft_flow_offload_type);
|
||||||
|
unregister_netdevice_notifier(&flow_offload_netdev_notifier);
|
||||||
|
- rtnl_lock();
|
||||||
|
- for_each_net(net)
|
||||||
|
- nft_flow_table_iterate(net, nft_flow_offload_iterate_cleanup, NULL);
|
||||||
|
- rtnl_unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
module_init(nft_flow_offload_module_init);
|
|
@ -0,0 +1,97 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Tue, 23 Jan 2018 17:46:09 +0100
|
||||||
|
Subject: [PATCH] netfilter: nft_flow_offload: move flowtable cleanup
|
||||||
|
routines to nf_flow_table
|
||||||
|
|
||||||
|
Move the flowtable cleanup routines to nf_flow_table and expose the
|
||||||
|
nf_flow_table_cleanup() helper function.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/net/netfilter/nf_flow_table.h
|
||||||
|
+++ b/include/net/netfilter/nf_flow_table.h
|
||||||
|
@@ -95,6 +95,9 @@ struct flow_offload_tuple_rhash *flow_of
|
||||||
|
int nf_flow_table_iterate(struct nf_flowtable *flow_table,
|
||||||
|
void (*iter)(struct flow_offload *flow, void *data),
|
||||||
|
void *data);
|
||||||
|
+
|
||||||
|
+void nf_flow_table_cleanup(struct net *net, struct net_device *dev);
|
||||||
|
+
|
||||||
|
void nf_flow_offload_work_gc(struct work_struct *work);
|
||||||
|
extern const struct rhashtable_params nf_flow_offload_rhash_params;
|
||||||
|
|
||||||
|
--- a/net/netfilter/nf_flow_table.c
|
||||||
|
+++ b/net/netfilter/nf_flow_table.c
|
||||||
|
@@ -4,6 +4,7 @@
|
||||||
|
#include <linux/netfilter.h>
|
||||||
|
#include <linux/rhashtable.h>
|
||||||
|
#include <linux/netdevice.h>
|
||||||
|
+#include <net/netfilter/nf_tables.h>
|
||||||
|
#include <net/netfilter/nf_flow_table.h>
|
||||||
|
#include <net/netfilter/nf_conntrack.h>
|
||||||
|
#include <net/netfilter/nf_conntrack_core.h>
|
||||||
|
@@ -425,5 +426,28 @@ int nf_flow_dnat_port(const struct flow_
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
|
||||||
|
|
||||||
|
+static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
|
||||||
|
+{
|
||||||
|
+ struct net_device *dev = data;
|
||||||
|
+
|
||||||
|
+ if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ flow_offload_dead(flow);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
|
||||||
|
+ void *data)
|
||||||
|
+{
|
||||||
|
+ nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data);
|
||||||
|
+ flush_delayed_work(&flowtable->gc_work);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
|
||||||
|
+{
|
||||||
|
+ nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev);
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
|
||||||
|
+
|
||||||
|
MODULE_LICENSE("GPL");
|
||||||
|
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
|
||||||
|
--- a/net/netfilter/nft_flow_offload.c
|
||||||
|
+++ b/net/netfilter/nft_flow_offload.c
|
||||||
|
@@ -194,23 +194,6 @@ static struct nft_expr_type nft_flow_off
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
};
|
||||||
|
|
||||||
|
-static void flow_offload_iterate_cleanup(struct flow_offload *flow, void *data)
|
||||||
|
-{
|
||||||
|
- struct net_device *dev = data;
|
||||||
|
-
|
||||||
|
- if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
|
||||||
|
- return;
|
||||||
|
-
|
||||||
|
- flow_offload_dead(flow);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static void nft_flow_offload_iterate_cleanup(struct nf_flowtable *flowtable,
|
||||||
|
- void *data)
|
||||||
|
-{
|
||||||
|
- nf_flow_table_iterate(flowtable, flow_offload_iterate_cleanup, data);
|
||||||
|
- flush_delayed_work(&flowtable->gc_work);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static int flow_offload_netdev_event(struct notifier_block *this,
|
||||||
|
unsigned long event, void *ptr)
|
||||||
|
{
|
||||||
|
@@ -219,7 +202,7 @@ static int flow_offload_netdev_event(str
|
||||||
|
if (event != NETDEV_DOWN)
|
||||||
|
return NOTIFY_DONE;
|
||||||
|
|
||||||
|
- nft_flow_table_iterate(dev_net(dev), nft_flow_offload_iterate_cleanup, dev);
|
||||||
|
+ nf_flow_table_cleanup(dev_net(dev), dev);
|
||||||
|
|
||||||
|
return NOTIFY_DONE;
|
||||||
|
}
|
|
@ -0,0 +1,140 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Mon, 5 Feb 2018 21:44:50 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_tables: fix flowtable free
|
||||||
|
|
||||||
|
Every flow_offload entry is added into the table twice. Because of this,
|
||||||
|
rhashtable_free_and_destroy can't be used, since it would call kfree for
|
||||||
|
each flow_offload object twice.
|
||||||
|
|
||||||
|
This patch adds a call to nf_flow_table_iterate_cleanup() to schedule
|
||||||
|
removal of entries, then there is an explicitly invocation of the
|
||||||
|
garbage collector to clean up resources.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/net/netfilter/nf_flow_table.h
|
||||||
|
+++ b/include/net/netfilter/nf_flow_table.h
|
||||||
|
@@ -14,6 +14,7 @@ struct nf_flowtable_type {
|
||||||
|
struct list_head list;
|
||||||
|
int family;
|
||||||
|
void (*gc)(struct work_struct *work);
|
||||||
|
+ void (*free)(struct nf_flowtable *ft);
|
||||||
|
const struct rhashtable_params *params;
|
||||||
|
nf_hookfn *hook;
|
||||||
|
struct module *owner;
|
||||||
|
@@ -98,6 +99,7 @@ int nf_flow_table_iterate(struct nf_flow
|
||||||
|
|
||||||
|
void nf_flow_table_cleanup(struct net *net, struct net_device *dev);
|
||||||
|
|
||||||
|
+void nf_flow_table_free(struct nf_flowtable *flow_table);
|
||||||
|
void nf_flow_offload_work_gc(struct work_struct *work);
|
||||||
|
extern const struct rhashtable_params nf_flow_offload_rhash_params;
|
||||||
|
|
||||||
|
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
|
||||||
|
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
|
||||||
|
@@ -260,6 +260,7 @@ static struct nf_flowtable_type flowtabl
|
||||||
|
.family = NFPROTO_IPV4,
|
||||||
|
.params = &nf_flow_offload_rhash_params,
|
||||||
|
.gc = nf_flow_offload_work_gc,
|
||||||
|
+ .free = nf_flow_table_free,
|
||||||
|
.hook = nf_flow_offload_ip_hook,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
};
|
||||||
|
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
|
||||||
|
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
|
||||||
|
@@ -254,6 +254,7 @@ static struct nf_flowtable_type flowtabl
|
||||||
|
.family = NFPROTO_IPV6,
|
||||||
|
.params = &nf_flow_offload_rhash_params,
|
||||||
|
.gc = nf_flow_offload_work_gc,
|
||||||
|
+ .free = nf_flow_table_free,
|
||||||
|
.hook = nf_flow_offload_ipv6_hook,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
};
|
||||||
|
--- a/net/netfilter/nf_flow_table.c
|
||||||
|
+++ b/net/netfilter/nf_flow_table.c
|
||||||
|
@@ -232,19 +232,16 @@ static inline bool nf_flow_is_dying(cons
|
||||||
|
return flow->flags & FLOW_OFFLOAD_DYING;
|
||||||
|
}
|
||||||
|
|
||||||
|
-void nf_flow_offload_work_gc(struct work_struct *work)
|
||||||
|
+static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
|
||||||
|
{
|
||||||
|
struct flow_offload_tuple_rhash *tuplehash;
|
||||||
|
- struct nf_flowtable *flow_table;
|
||||||
|
struct rhashtable_iter hti;
|
||||||
|
struct flow_offload *flow;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
- flow_table = container_of(work, struct nf_flowtable, gc_work.work);
|
||||||
|
-
|
||||||
|
err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
|
||||||
|
if (err)
|
||||||
|
- goto schedule;
|
||||||
|
+ return 0;
|
||||||
|
|
||||||
|
rhashtable_walk_start(&hti);
|
||||||
|
|
||||||
|
@@ -270,7 +267,16 @@ void nf_flow_offload_work_gc(struct work
|
||||||
|
out:
|
||||||
|
rhashtable_walk_stop(&hti);
|
||||||
|
rhashtable_walk_exit(&hti);
|
||||||
|
-schedule:
|
||||||
|
+
|
||||||
|
+ return 1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void nf_flow_offload_work_gc(struct work_struct *work)
|
||||||
|
+{
|
||||||
|
+ struct nf_flowtable *flow_table;
|
||||||
|
+
|
||||||
|
+ flow_table = container_of(work, struct nf_flowtable, gc_work.work);
|
||||||
|
+ nf_flow_offload_gc_step(flow_table);
|
||||||
|
queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
|
||||||
|
@@ -449,5 +455,12 @@ void nf_flow_table_cleanup(struct net *n
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
|
||||||
|
|
||||||
|
+void nf_flow_table_free(struct nf_flowtable *flow_table)
|
||||||
|
+{
|
||||||
|
+ nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
|
||||||
|
+ WARN_ON(!nf_flow_offload_gc_step(flow_table));
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(nf_flow_table_free);
|
||||||
|
+
|
||||||
|
MODULE_LICENSE("GPL");
|
||||||
|
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
|
||||||
|
--- a/net/netfilter/nf_flow_table_inet.c
|
||||||
|
+++ b/net/netfilter/nf_flow_table_inet.c
|
||||||
|
@@ -24,6 +24,7 @@ static struct nf_flowtable_type flowtabl
|
||||||
|
.family = NFPROTO_INET,
|
||||||
|
.params = &nf_flow_offload_rhash_params,
|
||||||
|
.gc = nf_flow_offload_work_gc,
|
||||||
|
+ .free = nf_flow_table_free,
|
||||||
|
.hook = nf_flow_offload_inet_hook,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
};
|
||||||
|
--- a/net/netfilter/nf_tables_api.c
|
||||||
|
+++ b/net/netfilter/nf_tables_api.c
|
||||||
|
@@ -5271,17 +5271,12 @@ err:
|
||||||
|
nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static void nft_flowtable_destroy(void *ptr, void *arg)
|
||||||
|
-{
|
||||||
|
- kfree(ptr);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
|
||||||
|
{
|
||||||
|
cancel_delayed_work_sync(&flowtable->data.gc_work);
|
||||||
|
kfree(flowtable->name);
|
||||||
|
- rhashtable_free_and_destroy(&flowtable->data.rhashtable,
|
||||||
|
- nft_flowtable_destroy, NULL);
|
||||||
|
+ flowtable->data.type->free(&flowtable->data);
|
||||||
|
+ rhashtable_destroy(&flowtable->data.rhashtable);
|
||||||
|
module_put(flowtable->data.type->owner);
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,96 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Thu, 25 Jan 2018 12:58:55 +0100
|
||||||
|
Subject: [PATCH] netfilter: nft_flow_offload: handle netdevice events from
|
||||||
|
nf_flow_table
|
||||||
|
|
||||||
|
Move the code that deals with device events to the core.
|
||||||
|
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/netfilter/nf_flow_table.c
|
||||||
|
+++ b/net/netfilter/nf_flow_table.c
|
||||||
|
@@ -462,5 +462,35 @@ void nf_flow_table_free(struct nf_flowta
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nf_flow_table_free);
|
||||||
|
|
||||||
|
+static int nf_flow_table_netdev_event(struct notifier_block *this,
|
||||||
|
+ unsigned long event, void *ptr)
|
||||||
|
+{
|
||||||
|
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
|
||||||
|
+
|
||||||
|
+ if (event != NETDEV_DOWN)
|
||||||
|
+ return NOTIFY_DONE;
|
||||||
|
+
|
||||||
|
+ nf_flow_table_cleanup(dev_net(dev), dev);
|
||||||
|
+
|
||||||
|
+ return NOTIFY_DONE;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static struct notifier_block flow_offload_netdev_notifier = {
|
||||||
|
+ .notifier_call = nf_flow_table_netdev_event,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static int __init nf_flow_table_module_init(void)
|
||||||
|
+{
|
||||||
|
+ return register_netdevice_notifier(&flow_offload_netdev_notifier);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void __exit nf_flow_table_module_exit(void)
|
||||||
|
+{
|
||||||
|
+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+module_init(nf_flow_table_module_init);
|
||||||
|
+module_exit(nf_flow_table_module_exit);
|
||||||
|
+
|
||||||
|
MODULE_LICENSE("GPL");
|
||||||
|
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
|
||||||
|
--- a/net/netfilter/nft_flow_offload.c
|
||||||
|
+++ b/net/netfilter/nft_flow_offload.c
|
||||||
|
@@ -194,44 +194,14 @@ static struct nft_expr_type nft_flow_off
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
};
|
||||||
|
|
||||||
|
-static int flow_offload_netdev_event(struct notifier_block *this,
|
||||||
|
- unsigned long event, void *ptr)
|
||||||
|
-{
|
||||||
|
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
|
||||||
|
-
|
||||||
|
- if (event != NETDEV_DOWN)
|
||||||
|
- return NOTIFY_DONE;
|
||||||
|
-
|
||||||
|
- nf_flow_table_cleanup(dev_net(dev), dev);
|
||||||
|
-
|
||||||
|
- return NOTIFY_DONE;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static struct notifier_block flow_offload_netdev_notifier = {
|
||||||
|
- .notifier_call = flow_offload_netdev_event,
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
static int __init nft_flow_offload_module_init(void)
|
||||||
|
{
|
||||||
|
- int err;
|
||||||
|
-
|
||||||
|
- register_netdevice_notifier(&flow_offload_netdev_notifier);
|
||||||
|
-
|
||||||
|
- err = nft_register_expr(&nft_flow_offload_type);
|
||||||
|
- if (err < 0)
|
||||||
|
- goto register_expr;
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-
|
||||||
|
-register_expr:
|
||||||
|
- unregister_netdevice_notifier(&flow_offload_netdev_notifier);
|
||||||
|
- return err;
|
||||||
|
+ return nft_register_expr(&nft_flow_offload_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __exit nft_flow_offload_module_exit(void)
|
||||||
|
{
|
||||||
|
nft_unregister_expr(&nft_flow_offload_type);
|
||||||
|
- unregister_netdevice_notifier(&flow_offload_netdev_notifier);
|
||||||
|
}
|
||||||
|
|
||||||
|
module_init(nft_flow_offload_module_init);
|
|
@ -0,0 +1,468 @@
|
||||||
|
From: Harsha Sharma <harshasharmaiitr@gmail.com>
|
||||||
|
Date: Wed, 27 Dec 2017 00:59:00 +0530
|
||||||
|
Subject: [PATCH] netfilter: nf_tables: allocate handle and delete objects via
|
||||||
|
handle
|
||||||
|
|
||||||
|
This patch allows deletion of objects via unique handle which can be
|
||||||
|
listed via '-a' option.
|
||||||
|
|
||||||
|
Signed-off-by: Harsha Sharma <harshasharmaiitr@gmail.com>
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/net/netfilter/nf_tables.h
|
||||||
|
+++ b/include/net/netfilter/nf_tables.h
|
||||||
|
@@ -370,6 +370,7 @@ void nft_unregister_set(struct nft_set_t
|
||||||
|
* @list: table set list node
|
||||||
|
* @bindings: list of set bindings
|
||||||
|
* @name: name of the set
|
||||||
|
+ * @handle: unique handle of the set
|
||||||
|
* @ktype: key type (numeric type defined by userspace, not used in the kernel)
|
||||||
|
* @dtype: data type (verdict or numeric type defined by userspace)
|
||||||
|
* @objtype: object type (see NFT_OBJECT_* definitions)
|
||||||
|
@@ -392,6 +393,7 @@ struct nft_set {
|
||||||
|
struct list_head list;
|
||||||
|
struct list_head bindings;
|
||||||
|
char *name;
|
||||||
|
+ u64 handle;
|
||||||
|
u32 ktype;
|
||||||
|
u32 dtype;
|
||||||
|
u32 objtype;
|
||||||
|
@@ -941,6 +943,7 @@ unsigned int nft_do_chain(struct nft_pkt
|
||||||
|
* @objects: stateful objects in the table
|
||||||
|
* @flowtables: flow tables in the table
|
||||||
|
* @hgenerator: handle generator state
|
||||||
|
+ * @handle: table handle
|
||||||
|
* @use: number of chain references to this table
|
||||||
|
* @flags: table flag (see enum nft_table_flags)
|
||||||
|
* @genmask: generation mask
|
||||||
|
@@ -954,6 +957,7 @@ struct nft_table {
|
||||||
|
struct list_head objects;
|
||||||
|
struct list_head flowtables;
|
||||||
|
u64 hgenerator;
|
||||||
|
+ u64 handle;
|
||||||
|
u32 use;
|
||||||
|
u16 family:6,
|
||||||
|
flags:8,
|
||||||
|
@@ -978,9 +982,9 @@ int nft_verdict_dump(struct sk_buff *skb
|
||||||
|
* @name: name of this stateful object
|
||||||
|
* @genmask: generation mask
|
||||||
|
* @use: number of references to this stateful object
|
||||||
|
- * @data: object data, layout depends on type
|
||||||
|
+ * @handle: unique object handle
|
||||||
|
* @ops: object operations
|
||||||
|
- * @data: pointer to object data
|
||||||
|
+ * @data: object data, layout depends on type
|
||||||
|
*/
|
||||||
|
struct nft_object {
|
||||||
|
struct list_head list;
|
||||||
|
@@ -988,6 +992,7 @@ struct nft_object {
|
||||||
|
struct nft_table *table;
|
||||||
|
u32 genmask:2,
|
||||||
|
use:30;
|
||||||
|
+ u64 handle;
|
||||||
|
/* runtime data below here */
|
||||||
|
const struct nft_object_ops *ops ____cacheline_aligned;
|
||||||
|
unsigned char data[]
|
||||||
|
@@ -1069,6 +1074,7 @@ void nft_unregister_obj(struct nft_objec
|
||||||
|
* @ops_len: number of hooks in array
|
||||||
|
* @genmask: generation mask
|
||||||
|
* @use: number of references to this flow table
|
||||||
|
+ * @handle: unique object handle
|
||||||
|
* @data: rhashtable and garbage collector
|
||||||
|
* @ops: array of hooks
|
||||||
|
*/
|
||||||
|
@@ -1081,6 +1087,7 @@ struct nft_flowtable {
|
||||||
|
int ops_len;
|
||||||
|
u32 genmask:2,
|
||||||
|
use:30;
|
||||||
|
+ u64 handle;
|
||||||
|
/* runtime data below here */
|
||||||
|
struct nf_hook_ops *ops ____cacheline_aligned;
|
||||||
|
struct nf_flowtable data;
|
||||||
|
--- a/include/uapi/linux/netfilter/nf_tables.h
|
||||||
|
+++ b/include/uapi/linux/netfilter/nf_tables.h
|
||||||
|
@@ -174,6 +174,8 @@ enum nft_table_attributes {
|
||||||
|
NFTA_TABLE_NAME,
|
||||||
|
NFTA_TABLE_FLAGS,
|
||||||
|
NFTA_TABLE_USE,
|
||||||
|
+ NFTA_TABLE_HANDLE,
|
||||||
|
+ NFTA_TABLE_PAD,
|
||||||
|
__NFTA_TABLE_MAX
|
||||||
|
};
|
||||||
|
#define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1)
|
||||||
|
@@ -317,6 +319,7 @@ enum nft_set_desc_attributes {
|
||||||
|
* @NFTA_SET_GC_INTERVAL: garbage collection interval (NLA_U32)
|
||||||
|
* @NFTA_SET_USERDATA: user data (NLA_BINARY)
|
||||||
|
* @NFTA_SET_OBJ_TYPE: stateful object type (NLA_U32: NFT_OBJECT_*)
|
||||||
|
+ * @NFTA_SET_HANDLE: set handle (NLA_U64)
|
||||||
|
*/
|
||||||
|
enum nft_set_attributes {
|
||||||
|
NFTA_SET_UNSPEC,
|
||||||
|
@@ -335,6 +338,7 @@ enum nft_set_attributes {
|
||||||
|
NFTA_SET_USERDATA,
|
||||||
|
NFTA_SET_PAD,
|
||||||
|
NFTA_SET_OBJ_TYPE,
|
||||||
|
+ NFTA_SET_HANDLE,
|
||||||
|
__NFTA_SET_MAX
|
||||||
|
};
|
||||||
|
#define NFTA_SET_MAX (__NFTA_SET_MAX - 1)
|
||||||
|
@@ -1314,6 +1318,7 @@ enum nft_ct_helper_attributes {
|
||||||
|
* @NFTA_OBJ_TYPE: stateful object type (NLA_U32)
|
||||||
|
* @NFTA_OBJ_DATA: stateful object data (NLA_NESTED)
|
||||||
|
* @NFTA_OBJ_USE: number of references to this expression (NLA_U32)
|
||||||
|
+ * @NFTA_OBJ_HANDLE: object handle (NLA_U64)
|
||||||
|
*/
|
||||||
|
enum nft_object_attributes {
|
||||||
|
NFTA_OBJ_UNSPEC,
|
||||||
|
@@ -1322,6 +1327,8 @@ enum nft_object_attributes {
|
||||||
|
NFTA_OBJ_TYPE,
|
||||||
|
NFTA_OBJ_DATA,
|
||||||
|
NFTA_OBJ_USE,
|
||||||
|
+ NFTA_OBJ_HANDLE,
|
||||||
|
+ NFTA_OBJ_PAD,
|
||||||
|
__NFTA_OBJ_MAX
|
||||||
|
};
|
||||||
|
#define NFTA_OBJ_MAX (__NFTA_OBJ_MAX - 1)
|
||||||
|
@@ -1333,6 +1340,7 @@ enum nft_object_attributes {
|
||||||
|
* @NFTA_FLOWTABLE_NAME: name of this flow table (NLA_STRING)
|
||||||
|
* @NFTA_FLOWTABLE_HOOK: netfilter hook configuration(NLA_U32)
|
||||||
|
* @NFTA_FLOWTABLE_USE: number of references to this flow table (NLA_U32)
|
||||||
|
+ * @NFTA_FLOWTABLE_HANDLE: object handle (NLA_U64)
|
||||||
|
*/
|
||||||
|
enum nft_flowtable_attributes {
|
||||||
|
NFTA_FLOWTABLE_UNSPEC,
|
||||||
|
@@ -1340,6 +1348,8 @@ enum nft_flowtable_attributes {
|
||||||
|
NFTA_FLOWTABLE_NAME,
|
||||||
|
NFTA_FLOWTABLE_HOOK,
|
||||||
|
NFTA_FLOWTABLE_USE,
|
||||||
|
+ NFTA_FLOWTABLE_HANDLE,
|
||||||
|
+ NFTA_FLOWTABLE_PAD,
|
||||||
|
__NFTA_FLOWTABLE_MAX
|
||||||
|
};
|
||||||
|
#define NFTA_FLOWTABLE_MAX (__NFTA_FLOWTABLE_MAX - 1)
|
||||||
|
--- a/net/netfilter/nf_tables_api.c
|
||||||
|
+++ b/net/netfilter/nf_tables_api.c
|
||||||
|
@@ -26,6 +26,7 @@
|
||||||
|
static LIST_HEAD(nf_tables_expressions);
|
||||||
|
static LIST_HEAD(nf_tables_objects);
|
||||||
|
static LIST_HEAD(nf_tables_flowtables);
|
||||||
|
+static u64 table_handle;
|
||||||
|
|
||||||
|
static void nft_ctx_init(struct nft_ctx *ctx,
|
||||||
|
struct net *net,
|
||||||
|
@@ -361,6 +362,20 @@ static struct nft_table *nft_table_looku
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
|
||||||
|
+ const struct nlattr *nla,
|
||||||
|
+ u8 genmask)
|
||||||
|
+{
|
||||||
|
+ struct nft_table *table;
|
||||||
|
+
|
||||||
|
+ list_for_each_entry(table, &net->nft.tables, list) {
|
||||||
|
+ if (be64_to_cpu(nla_get_be64(nla)) == table->handle &&
|
||||||
|
+ nft_active_genmask(table, genmask))
|
||||||
|
+ return table;
|
||||||
|
+ }
|
||||||
|
+ return NULL;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static struct nft_table *nf_tables_table_lookup(const struct net *net,
|
||||||
|
const struct nlattr *nla,
|
||||||
|
u8 family, u8 genmask)
|
||||||
|
@@ -377,6 +392,22 @@ static struct nft_table *nf_tables_table
|
||||||
|
return ERR_PTR(-ENOENT);
|
||||||
|
}
|
||||||
|
|
||||||
|
+static struct nft_table *nf_tables_table_lookup_byhandle(const struct net *net,
|
||||||
|
+ const struct nlattr *nla,
|
||||||
|
+ u8 genmask)
|
||||||
|
+{
|
||||||
|
+ struct nft_table *table;
|
||||||
|
+
|
||||||
|
+ if (nla == NULL)
|
||||||
|
+ return ERR_PTR(-EINVAL);
|
||||||
|
+
|
||||||
|
+ table = nft_table_lookup_byhandle(net, nla, genmask);
|
||||||
|
+ if (table != NULL)
|
||||||
|
+ return table;
|
||||||
|
+
|
||||||
|
+ return ERR_PTR(-ENOENT);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static inline u64 nf_tables_alloc_handle(struct nft_table *table)
|
||||||
|
{
|
||||||
|
return ++table->hgenerator;
|
||||||
|
@@ -423,6 +454,7 @@ static const struct nla_policy nft_table
|
||||||
|
[NFTA_TABLE_NAME] = { .type = NLA_STRING,
|
||||||
|
.len = NFT_TABLE_MAXNAMELEN - 1 },
|
||||||
|
[NFTA_TABLE_FLAGS] = { .type = NLA_U32 },
|
||||||
|
+ [NFTA_TABLE_HANDLE] = { .type = NLA_U64 },
|
||||||
|
};
|
||||||
|
|
||||||
|
static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
|
||||||
|
@@ -444,7 +476,9 @@ static int nf_tables_fill_table_info(str
|
||||||
|
|
||||||
|
if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
|
||||||
|
nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) ||
|
||||||
|
- nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)))
|
||||||
|
+ nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) ||
|
||||||
|
+ nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle),
|
||||||
|
+ NFTA_TABLE_PAD))
|
||||||
|
goto nla_put_failure;
|
||||||
|
|
||||||
|
nlmsg_end(skb, nlh);
|
||||||
|
@@ -703,6 +737,7 @@ static int nf_tables_newtable(struct net
|
||||||
|
INIT_LIST_HEAD(&table->flowtables);
|
||||||
|
table->family = family;
|
||||||
|
table->flags = flags;
|
||||||
|
+ table->handle = ++table_handle;
|
||||||
|
|
||||||
|
nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
|
||||||
|
err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
|
||||||
|
@@ -820,11 +855,18 @@ static int nf_tables_deltable(struct net
|
||||||
|
struct nft_ctx ctx;
|
||||||
|
|
||||||
|
nft_ctx_init(&ctx, net, skb, nlh, 0, NULL, NULL, nla);
|
||||||
|
- if (family == AF_UNSPEC || nla[NFTA_TABLE_NAME] == NULL)
|
||||||
|
+ if (family == AF_UNSPEC ||
|
||||||
|
+ (!nla[NFTA_TABLE_NAME] && !nla[NFTA_TABLE_HANDLE]))
|
||||||
|
return nft_flush(&ctx, family);
|
||||||
|
|
||||||
|
- table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], family,
|
||||||
|
- genmask);
|
||||||
|
+ if (nla[NFTA_TABLE_HANDLE])
|
||||||
|
+ table = nf_tables_table_lookup_byhandle(net,
|
||||||
|
+ nla[NFTA_TABLE_HANDLE],
|
||||||
|
+ genmask);
|
||||||
|
+ else
|
||||||
|
+ table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME],
|
||||||
|
+ family, genmask);
|
||||||
|
+
|
||||||
|
if (IS_ERR(table))
|
||||||
|
return PTR_ERR(table);
|
||||||
|
|
||||||
|
@@ -1581,6 +1623,7 @@ static int nf_tables_delchain(struct net
|
||||||
|
struct nft_rule *rule;
|
||||||
|
int family = nfmsg->nfgen_family;
|
||||||
|
struct nft_ctx ctx;
|
||||||
|
+ u64 handle;
|
||||||
|
u32 use;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
@@ -1589,7 +1632,12 @@ static int nf_tables_delchain(struct net
|
||||||
|
if (IS_ERR(table))
|
||||||
|
return PTR_ERR(table);
|
||||||
|
|
||||||
|
- chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
|
||||||
|
+ if (nla[NFTA_CHAIN_HANDLE]) {
|
||||||
|
+ handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
|
||||||
|
+ chain = nf_tables_chain_lookup_byhandle(table, handle, genmask);
|
||||||
|
+ } else {
|
||||||
|
+ chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
|
||||||
|
+ }
|
||||||
|
if (IS_ERR(chain))
|
||||||
|
return PTR_ERR(chain);
|
||||||
|
|
||||||
|
@@ -2563,6 +2611,7 @@ static const struct nla_policy nft_set_p
|
||||||
|
[NFTA_SET_USERDATA] = { .type = NLA_BINARY,
|
||||||
|
.len = NFT_USERDATA_MAXLEN },
|
||||||
|
[NFTA_SET_OBJ_TYPE] = { .type = NLA_U32 },
|
||||||
|
+ [NFTA_SET_HANDLE] = { .type = NLA_U64 },
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
|
||||||
|
@@ -2606,6 +2655,22 @@ static struct nft_set *nf_tables_set_loo
|
||||||
|
return ERR_PTR(-ENOENT);
|
||||||
|
}
|
||||||
|
|
||||||
|
+static struct nft_set *nf_tables_set_lookup_byhandle(const struct nft_table *table,
|
||||||
|
+ const struct nlattr *nla, u8 genmask)
|
||||||
|
+{
|
||||||
|
+ struct nft_set *set;
|
||||||
|
+
|
||||||
|
+ if (nla == NULL)
|
||||||
|
+ return ERR_PTR(-EINVAL);
|
||||||
|
+
|
||||||
|
+ list_for_each_entry(set, &table->sets, list) {
|
||||||
|
+ if (be64_to_cpu(nla_get_be64(nla)) == set->handle &&
|
||||||
|
+ nft_active_genmask(set, genmask))
|
||||||
|
+ return set;
|
||||||
|
+ }
|
||||||
|
+ return ERR_PTR(-ENOENT);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
|
||||||
|
const struct nlattr *nla,
|
||||||
|
u8 genmask)
|
||||||
|
@@ -2722,6 +2787,9 @@ static int nf_tables_fill_set(struct sk_
|
||||||
|
goto nla_put_failure;
|
||||||
|
if (nla_put_string(skb, NFTA_SET_NAME, set->name))
|
||||||
|
goto nla_put_failure;
|
||||||
|
+ if (nla_put_be64(skb, NFTA_SET_HANDLE, cpu_to_be64(set->handle),
|
||||||
|
+ NFTA_SET_PAD))
|
||||||
|
+ goto nla_put_failure;
|
||||||
|
if (set->flags != 0)
|
||||||
|
if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(set->flags)))
|
||||||
|
goto nla_put_failure;
|
||||||
|
@@ -3130,6 +3198,7 @@ static int nf_tables_newset(struct net *
|
||||||
|
set->udata = udata;
|
||||||
|
set->timeout = timeout;
|
||||||
|
set->gc_int = gc_int;
|
||||||
|
+ set->handle = nf_tables_alloc_handle(table);
|
||||||
|
|
||||||
|
err = ops->init(set, &desc, nla);
|
||||||
|
if (err < 0)
|
||||||
|
@@ -3189,7 +3258,10 @@ static int nf_tables_delset(struct net *
|
||||||
|
if (err < 0)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
- set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
|
||||||
|
+ if (nla[NFTA_SET_HANDLE])
|
||||||
|
+ set = nf_tables_set_lookup_byhandle(ctx.table, nla[NFTA_SET_HANDLE], genmask);
|
||||||
|
+ else
|
||||||
|
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
|
||||||
|
if (IS_ERR(set))
|
||||||
|
return PTR_ERR(set);
|
||||||
|
|
||||||
|
@@ -4250,6 +4322,21 @@ struct nft_object *nf_tables_obj_lookup(
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nf_tables_obj_lookup);
|
||||||
|
|
||||||
|
+struct nft_object *nf_tables_obj_lookup_byhandle(const struct nft_table *table,
|
||||||
|
+ const struct nlattr *nla,
|
||||||
|
+ u32 objtype, u8 genmask)
|
||||||
|
+{
|
||||||
|
+ struct nft_object *obj;
|
||||||
|
+
|
||||||
|
+ list_for_each_entry(obj, &table->objects, list) {
|
||||||
|
+ if (be64_to_cpu(nla_get_be64(nla)) == obj->handle &&
|
||||||
|
+ objtype == obj->ops->type->type &&
|
||||||
|
+ nft_active_genmask(obj, genmask))
|
||||||
|
+ return obj;
|
||||||
|
+ }
|
||||||
|
+ return ERR_PTR(-ENOENT);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = {
|
||||||
|
[NFTA_OBJ_TABLE] = { .type = NLA_STRING,
|
||||||
|
.len = NFT_TABLE_MAXNAMELEN - 1 },
|
||||||
|
@@ -4257,6 +4344,7 @@ static const struct nla_policy nft_obj_p
|
||||||
|
.len = NFT_OBJ_MAXNAMELEN - 1 },
|
||||||
|
[NFTA_OBJ_TYPE] = { .type = NLA_U32 },
|
||||||
|
[NFTA_OBJ_DATA] = { .type = NLA_NESTED },
|
||||||
|
+ [NFTA_OBJ_HANDLE] = { .type = NLA_U64},
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
|
||||||
|
@@ -4404,6 +4492,8 @@ static int nf_tables_newobj(struct net *
|
||||||
|
goto err1;
|
||||||
|
}
|
||||||
|
obj->table = table;
|
||||||
|
+ obj->handle = nf_tables_alloc_handle(table);
|
||||||
|
+
|
||||||
|
obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
|
||||||
|
if (!obj->name) {
|
||||||
|
err = -ENOMEM;
|
||||||
|
@@ -4450,7 +4540,9 @@ static int nf_tables_fill_obj_info(struc
|
||||||
|
nla_put_string(skb, NFTA_OBJ_NAME, obj->name) ||
|
||||||
|
nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) ||
|
||||||
|
nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) ||
|
||||||
|
- nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset))
|
||||||
|
+ nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset) ||
|
||||||
|
+ nla_put_be64(skb, NFTA_OBJ_HANDLE, cpu_to_be64(obj->handle),
|
||||||
|
+ NFTA_OBJ_PAD))
|
||||||
|
goto nla_put_failure;
|
||||||
|
|
||||||
|
nlmsg_end(skb, nlh);
|
||||||
|
@@ -4648,7 +4740,7 @@ static int nf_tables_delobj(struct net *
|
||||||
|
u32 objtype;
|
||||||
|
|
||||||
|
if (!nla[NFTA_OBJ_TYPE] ||
|
||||||
|
- !nla[NFTA_OBJ_NAME])
|
||||||
|
+ (!nla[NFTA_OBJ_NAME] && !nla[NFTA_OBJ_HANDLE]))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family,
|
||||||
|
@@ -4657,7 +4749,12 @@ static int nf_tables_delobj(struct net *
|
||||||
|
return PTR_ERR(table);
|
||||||
|
|
||||||
|
objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
|
||||||
|
- obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
|
||||||
|
+ if (nla[NFTA_OBJ_HANDLE])
|
||||||
|
+ obj = nf_tables_obj_lookup_byhandle(table, nla[NFTA_OBJ_HANDLE],
|
||||||
|
+ objtype, genmask);
|
||||||
|
+ else
|
||||||
|
+ obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME],
|
||||||
|
+ objtype, genmask);
|
||||||
|
if (IS_ERR(obj))
|
||||||
|
return PTR_ERR(obj);
|
||||||
|
if (obj->use > 0)
|
||||||
|
@@ -4729,6 +4826,7 @@ static const struct nla_policy nft_flowt
|
||||||
|
[NFTA_FLOWTABLE_NAME] = { .type = NLA_STRING,
|
||||||
|
.len = NFT_NAME_MAXLEN - 1 },
|
||||||
|
[NFTA_FLOWTABLE_HOOK] = { .type = NLA_NESTED },
|
||||||
|
+ [NFTA_FLOWTABLE_HANDLE] = { .type = NLA_U64 },
|
||||||
|
};
|
||||||
|
|
||||||
|
struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
|
||||||
|
@@ -4746,6 +4844,20 @@ struct nft_flowtable *nf_tables_flowtabl
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nf_tables_flowtable_lookup);
|
||||||
|
|
||||||
|
+struct nft_flowtable *
|
||||||
|
+nf_tables_flowtable_lookup_byhandle(const struct nft_table *table,
|
||||||
|
+ const struct nlattr *nla, u8 genmask)
|
||||||
|
+{
|
||||||
|
+ struct nft_flowtable *flowtable;
|
||||||
|
+
|
||||||
|
+ list_for_each_entry(flowtable, &table->flowtables, list) {
|
||||||
|
+ if (be64_to_cpu(nla_get_be64(nla)) == flowtable->handle &&
|
||||||
|
+ nft_active_genmask(flowtable, genmask))
|
||||||
|
+ return flowtable;
|
||||||
|
+ }
|
||||||
|
+ return ERR_PTR(-ENOENT);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
#define NFT_FLOWTABLE_DEVICE_MAX 8
|
||||||
|
|
||||||
|
static int nf_tables_parse_devices(const struct nft_ctx *ctx,
|
||||||
|
@@ -4954,6 +5066,8 @@ static int nf_tables_newflowtable(struct
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
flowtable->table = table;
|
||||||
|
+ flowtable->handle = nf_tables_alloc_handle(table);
|
||||||
|
+
|
||||||
|
flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL);
|
||||||
|
if (!flowtable->name) {
|
||||||
|
err = -ENOMEM;
|
||||||
|
@@ -5028,8 +5142,14 @@ static int nf_tables_delflowtable(struct
|
||||||
|
if (IS_ERR(table))
|
||||||
|
return PTR_ERR(table);
|
||||||
|
|
||||||
|
- flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
|
||||||
|
- genmask);
|
||||||
|
+ if (nla[NFTA_FLOWTABLE_HANDLE])
|
||||||
|
+ flowtable = nf_tables_flowtable_lookup_byhandle(table,
|
||||||
|
+ nla[NFTA_FLOWTABLE_HANDLE],
|
||||||
|
+ genmask);
|
||||||
|
+ else
|
||||||
|
+ flowtable = nf_tables_flowtable_lookup(table,
|
||||||
|
+ nla[NFTA_FLOWTABLE_NAME],
|
||||||
|
+ genmask);
|
||||||
|
if (IS_ERR(flowtable))
|
||||||
|
return PTR_ERR(flowtable);
|
||||||
|
if (flowtable->use > 0)
|
||||||
|
@@ -5062,7 +5182,9 @@ static int nf_tables_fill_flowtable_info
|
||||||
|
|
||||||
|
if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) ||
|
||||||
|
nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) ||
|
||||||
|
- nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)))
|
||||||
|
+ nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) ||
|
||||||
|
+ nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle),
|
||||||
|
+ NFTA_FLOWTABLE_PAD))
|
||||||
|
goto nla_put_failure;
|
||||||
|
|
||||||
|
nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK);
|
|
@ -0,0 +1,95 @@
|
||||||
|
From: Felix Fietkau <nbd@nbd.name>
|
||||||
|
Date: Wed, 7 Feb 2018 09:23:25 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_flow_offload: fix use-after-free and a resource
|
||||||
|
leak
|
||||||
|
|
||||||
|
flow_offload_del frees the flow, so all associated resource must be
|
||||||
|
freed before.
|
||||||
|
|
||||||
|
Since the ct entry in struct flow_offload_entry was allocated by
|
||||||
|
flow_offload_alloc, it should be freed by flow_offload_free to take care
|
||||||
|
of the error handling path when flow_offload_add fails.
|
||||||
|
|
||||||
|
While at it, make flow_offload_del static, since it should never be
|
||||||
|
called directly, only from the gc step
|
||||||
|
|
||||||
|
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/net/netfilter/nf_flow_table.h
|
||||||
|
+++ b/include/net/netfilter/nf_flow_table.h
|
||||||
|
@@ -90,7 +90,6 @@ struct flow_offload *flow_offload_alloc(
|
||||||
|
void flow_offload_free(struct flow_offload *flow);
|
||||||
|
|
||||||
|
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
|
||||||
|
-void flow_offload_del(struct nf_flowtable *flow_table, struct flow_offload *flow);
|
||||||
|
struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
|
||||||
|
struct flow_offload_tuple *tuple);
|
||||||
|
int nf_flow_table_iterate(struct nf_flowtable *flow_table,
|
||||||
|
--- a/net/netfilter/nf_flow_table.c
|
||||||
|
+++ b/net/netfilter/nf_flow_table.c
|
||||||
|
@@ -125,7 +125,9 @@ void flow_offload_free(struct flow_offlo
|
||||||
|
dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
|
||||||
|
dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
|
||||||
|
e = container_of(flow, struct flow_offload_entry, flow);
|
||||||
|
- kfree(e);
|
||||||
|
+ nf_ct_delete(e->ct, 0, 0);
|
||||||
|
+ nf_ct_put(e->ct);
|
||||||
|
+ kfree_rcu(e, rcu_head);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(flow_offload_free);
|
||||||
|
|
||||||
|
@@ -149,11 +151,9 @@ int flow_offload_add(struct nf_flowtable
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(flow_offload_add);
|
||||||
|
|
||||||
|
-void flow_offload_del(struct nf_flowtable *flow_table,
|
||||||
|
- struct flow_offload *flow)
|
||||||
|
+static void flow_offload_del(struct nf_flowtable *flow_table,
|
||||||
|
+ struct flow_offload *flow)
|
||||||
|
{
|
||||||
|
- struct flow_offload_entry *e;
|
||||||
|
-
|
||||||
|
rhashtable_remove_fast(&flow_table->rhashtable,
|
||||||
|
&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
|
||||||
|
*flow_table->type->params);
|
||||||
|
@@ -161,10 +161,8 @@ void flow_offload_del(struct nf_flowtabl
|
||||||
|
&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
|
||||||
|
*flow_table->type->params);
|
||||||
|
|
||||||
|
- e = container_of(flow, struct flow_offload_entry, flow);
|
||||||
|
- kfree_rcu(e, rcu_head);
|
||||||
|
+ flow_offload_free(flow);
|
||||||
|
}
|
||||||
|
-EXPORT_SYMBOL_GPL(flow_offload_del);
|
||||||
|
|
||||||
|
struct flow_offload_tuple_rhash *
|
||||||
|
flow_offload_lookup(struct nf_flowtable *flow_table,
|
||||||
|
@@ -175,15 +173,6 @@ flow_offload_lookup(struct nf_flowtable
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(flow_offload_lookup);
|
||||||
|
|
||||||
|
-static void nf_flow_release_ct(const struct flow_offload *flow)
|
||||||
|
-{
|
||||||
|
- struct flow_offload_entry *e;
|
||||||
|
-
|
||||||
|
- e = container_of(flow, struct flow_offload_entry, flow);
|
||||||
|
- nf_ct_delete(e->ct, 0, 0);
|
||||||
|
- nf_ct_put(e->ct);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
int nf_flow_table_iterate(struct nf_flowtable *flow_table,
|
||||||
|
void (*iter)(struct flow_offload *flow, void *data),
|
||||||
|
void *data)
|
||||||
|
@@ -259,10 +248,8 @@ static int nf_flow_offload_gc_step(struc
|
||||||
|
flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
|
||||||
|
|
||||||
|
if (nf_flow_has_expired(flow) ||
|
||||||
|
- nf_flow_is_dying(flow)) {
|
||||||
|
+ nf_flow_is_dying(flow))
|
||||||
|
flow_offload_del(flow_table, flow);
|
||||||
|
- nf_flow_release_ct(flow);
|
||||||
|
- }
|
||||||
|
}
|
||||||
|
out:
|
||||||
|
rhashtable_walk_stop(&hti);
|
|
@ -0,0 +1,73 @@
|
||||||
|
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
Date: Wed, 31 Jan 2018 18:13:39 +0100
|
||||||
|
Subject: [PATCH] netfilter: flowtable infrastructure depends on
|
||||||
|
NETFILTER_INGRESS
|
||||||
|
|
||||||
|
config NF_FLOW_TABLE depends on NETFILTER_INGRESS. If users forget to
|
||||||
|
enable this toggle, flowtable registration fails with EOPNOTSUPP.
|
||||||
|
|
||||||
|
Moreover, turn 'select NF_FLOW_TABLE' in every flowtable family flavour
|
||||||
|
into dependency instead, otherwise this new dependency on
|
||||||
|
NETFILTER_INGRESS causes a warning. This also allows us to remove the
|
||||||
|
explicit dependency between family flowtables <-> NF_TABLES and
|
||||||
|
NF_CONNTRACK, given they depend on the NF_FLOW_TABLE core that already
|
||||||
|
expresses the general dependencies for this new infrastructure.
|
||||||
|
|
||||||
|
Moreover, NF_FLOW_TABLE_INET depends on NF_FLOW_TABLE_IPV4 and
|
||||||
|
NF_FLOWTABLE_IPV6, which already depends on NF_FLOW_TABLE. So we can get
|
||||||
|
rid of direct dependency with NF_FLOW_TABLE.
|
||||||
|
|
||||||
|
In general, let's avoid 'select', it just makes things more complicated.
|
||||||
|
|
||||||
|
Reported-by: John Crispin <john@phrozen.org>
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/ipv4/netfilter/Kconfig
|
||||||
|
+++ b/net/ipv4/netfilter/Kconfig
|
||||||
|
@@ -80,8 +80,7 @@ endif # NF_TABLES
|
||||||
|
|
||||||
|
config NF_FLOW_TABLE_IPV4
|
||||||
|
tristate "Netfilter flow table IPv4 module"
|
||||||
|
- depends on NF_CONNTRACK && NF_TABLES
|
||||||
|
- select NF_FLOW_TABLE
|
||||||
|
+ depends on NF_FLOW_TABLE
|
||||||
|
help
|
||||||
|
This option adds the flow table IPv4 support.
|
||||||
|
|
||||||
|
--- a/net/ipv6/netfilter/Kconfig
|
||||||
|
+++ b/net/ipv6/netfilter/Kconfig
|
||||||
|
@@ -101,8 +101,7 @@ endif # NF_TABLES
|
||||||
|
|
||||||
|
config NF_FLOW_TABLE_IPV6
|
||||||
|
tristate "Netfilter flow table IPv6 module"
|
||||||
|
- depends on NF_CONNTRACK && NF_TABLES
|
||||||
|
- select NF_FLOW_TABLE
|
||||||
|
+ depends on NF_FLOW_TABLE
|
||||||
|
help
|
||||||
|
This option adds the flow table IPv6 support.
|
||||||
|
|
||||||
|
--- a/net/netfilter/Kconfig
|
||||||
|
+++ b/net/netfilter/Kconfig
|
||||||
|
@@ -676,8 +676,8 @@ endif # NF_TABLES
|
||||||
|
|
||||||
|
config NF_FLOW_TABLE_INET
|
||||||
|
tristate "Netfilter flow table mixed IPv4/IPv6 module"
|
||||||
|
- depends on NF_FLOW_TABLE_IPV4 && NF_FLOW_TABLE_IPV6
|
||||||
|
- select NF_FLOW_TABLE
|
||||||
|
+ depends on NF_FLOW_TABLE_IPV4
|
||||||
|
+ depends on NF_FLOW_TABLE_IPV6
|
||||||
|
help
|
||||||
|
This option adds the flow table mixed IPv4/IPv6 support.
|
||||||
|
|
||||||
|
@@ -685,7 +685,9 @@ config NF_FLOW_TABLE_INET
|
||||||
|
|
||||||
|
config NF_FLOW_TABLE
|
||||||
|
tristate "Netfilter flow table module"
|
||||||
|
- depends on NF_CONNTRACK && NF_TABLES
|
||||||
|
+ depends on NETFILTER_INGRESS
|
||||||
|
+ depends on NF_CONNTRACK
|
||||||
|
+ depends on NF_TABLES
|
||||||
|
help
|
||||||
|
This option adds the flow table core infrastructure.
|
||||||
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
From: Wei Yongjun <weiyongjun1@huawei.com>
|
||||||
|
Date: Wed, 10 Jan 2018 13:06:46 +0000
|
||||||
|
Subject: [PATCH] netfilter: remove duplicated include
|
||||||
|
|
||||||
|
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
|
||||||
|
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
|
||||||
|
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
|
||||||
|
@@ -5,7 +5,6 @@
|
||||||
|
#include <linux/rhashtable.h>
|
||||||
|
#include <linux/ipv6.h>
|
||||||
|
#include <linux/netdevice.h>
|
||||||
|
-#include <linux/ipv6.h>
|
||||||
|
#include <net/ipv6.h>
|
||||||
|
#include <net/ip6_route.h>
|
||||||
|
#include <net/neighbour.h>
|
||||||
|
--- a/net/netfilter/nf_queue.c
|
||||||
|
+++ b/net/netfilter/nf_queue.c
|
||||||
|
@@ -15,8 +15,6 @@
|
||||||
|
#include <linux/netfilter_bridge.h>
|
||||||
|
#include <linux/seq_file.h>
|
||||||
|
#include <linux/rcupdate.h>
|
||||||
|
-#include <linux/netfilter_ipv4.h>
|
||||||
|
-#include <linux/netfilter_ipv6.h>
|
||||||
|
#include <net/protocol.h>
|
||||||
|
#include <net/netfilter/nf_queue.h>
|
||||||
|
#include <net/dst.h>
|
|
@ -0,0 +1,35 @@
|
||||||
|
From: Felix Fietkau <nbd@nbd.name>
|
||||||
|
Date: Fri, 16 Feb 2018 09:41:18 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_flow_table: use IP_CT_DIR_* values for
|
||||||
|
FLOW_OFFLOAD_DIR_*
|
||||||
|
|
||||||
|
Simplifies further code cleanups
|
||||||
|
|
||||||
|
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/net/netfilter/nf_flow_table.h
|
||||||
|
+++ b/include/net/netfilter/nf_flow_table.h
|
||||||
|
@@ -6,6 +6,7 @@
|
||||||
|
#include <linux/netdevice.h>
|
||||||
|
#include <linux/rhashtable.h>
|
||||||
|
#include <linux/rcupdate.h>
|
||||||
|
+#include <linux/netfilter/nf_conntrack_tuple_common.h>
|
||||||
|
#include <net/dst.h>
|
||||||
|
|
||||||
|
struct nf_flowtable;
|
||||||
|
@@ -27,11 +28,10 @@ struct nf_flowtable {
|
||||||
|
};
|
||||||
|
|
||||||
|
enum flow_offload_tuple_dir {
|
||||||
|
- FLOW_OFFLOAD_DIR_ORIGINAL,
|
||||||
|
- FLOW_OFFLOAD_DIR_REPLY,
|
||||||
|
- __FLOW_OFFLOAD_DIR_MAX = FLOW_OFFLOAD_DIR_REPLY,
|
||||||
|
+ FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
|
||||||
|
+ FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
|
||||||
|
+ FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
|
||||||
|
};
|
||||||
|
-#define FLOW_OFFLOAD_DIR_MAX (__FLOW_OFFLOAD_DIR_MAX + 1)
|
||||||
|
|
||||||
|
struct flow_offload_tuple {
|
||||||
|
union {
|
|
@ -0,0 +1,118 @@
|
||||||
|
From: Felix Fietkau <nbd@nbd.name>
|
||||||
|
Date: Fri, 16 Feb 2018 09:42:32 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_flow_table: clean up flow_offload_alloc
|
||||||
|
|
||||||
|
Reduce code duplication and make it much easier to read
|
||||||
|
|
||||||
|
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/netfilter/nf_flow_table.c
|
||||||
|
+++ b/net/netfilter/nf_flow_table.c
|
||||||
|
@@ -16,6 +16,38 @@ struct flow_offload_entry {
|
||||||
|
struct rcu_head rcu_head;
|
||||||
|
};
|
||||||
|
|
||||||
|
+static void
|
||||||
|
+flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
|
||||||
|
+ struct nf_flow_route *route,
|
||||||
|
+ enum flow_offload_tuple_dir dir)
|
||||||
|
+{
|
||||||
|
+ struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
|
||||||
|
+ struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
|
||||||
|
+
|
||||||
|
+ ft->dir = dir;
|
||||||
|
+
|
||||||
|
+ switch (ctt->src.l3num) {
|
||||||
|
+ case NFPROTO_IPV4:
|
||||||
|
+ ft->src_v4 = ctt->src.u3.in;
|
||||||
|
+ ft->dst_v4 = ctt->dst.u3.in;
|
||||||
|
+ break;
|
||||||
|
+ case NFPROTO_IPV6:
|
||||||
|
+ ft->src_v6 = ctt->src.u3.in6;
|
||||||
|
+ ft->dst_v6 = ctt->dst.u3.in6;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ ft->l3proto = ctt->src.l3num;
|
||||||
|
+ ft->l4proto = ctt->dst.protonum;
|
||||||
|
+ ft->src_port = ctt->src.u.tcp.port;
|
||||||
|
+ ft->dst_port = ctt->dst.u.tcp.port;
|
||||||
|
+
|
||||||
|
+ ft->iifidx = route->tuple[dir].ifindex;
|
||||||
|
+ ft->oifidx = route->tuple[!dir].ifindex;
|
||||||
|
+
|
||||||
|
+ ft->dst_cache = route->tuple[dir].dst;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
struct flow_offload *
|
||||||
|
flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
|
||||||
|
{
|
||||||
|
@@ -40,65 +72,8 @@ flow_offload_alloc(struct nf_conn *ct, s
|
||||||
|
|
||||||
|
entry->ct = ct;
|
||||||
|
|
||||||
|
- switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num) {
|
||||||
|
- case NFPROTO_IPV4:
|
||||||
|
- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4 =
|
||||||
|
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in;
|
||||||
|
- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4 =
|
||||||
|
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in;
|
||||||
|
- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4 =
|
||||||
|
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in;
|
||||||
|
- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4 =
|
||||||
|
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in;
|
||||||
|
- break;
|
||||||
|
- case NFPROTO_IPV6:
|
||||||
|
- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6 =
|
||||||
|
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6;
|
||||||
|
- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6 =
|
||||||
|
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6;
|
||||||
|
- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6 =
|
||||||
|
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in6;
|
||||||
|
- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6 =
|
||||||
|
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in6;
|
||||||
|
- break;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l3proto =
|
||||||
|
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
|
||||||
|
- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto =
|
||||||
|
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
|
||||||
|
- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l3proto =
|
||||||
|
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
|
||||||
|
- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l4proto =
|
||||||
|
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
|
||||||
|
-
|
||||||
|
- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache =
|
||||||
|
- route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst;
|
||||||
|
- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache =
|
||||||
|
- route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst;
|
||||||
|
-
|
||||||
|
- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port =
|
||||||
|
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port;
|
||||||
|
- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port =
|
||||||
|
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
|
||||||
|
- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port =
|
||||||
|
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port;
|
||||||
|
- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port =
|
||||||
|
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
|
||||||
|
-
|
||||||
|
- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dir =
|
||||||
|
- FLOW_OFFLOAD_DIR_ORIGINAL;
|
||||||
|
- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dir =
|
||||||
|
- FLOW_OFFLOAD_DIR_REPLY;
|
||||||
|
-
|
||||||
|
- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx =
|
||||||
|
- route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
|
||||||
|
- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.oifidx =
|
||||||
|
- route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
|
||||||
|
- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx =
|
||||||
|
- route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
|
||||||
|
- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.oifidx =
|
||||||
|
- route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
|
||||||
|
+ flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
|
||||||
|
+ flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
|
||||||
|
|
||||||
|
if (ct->status & IPS_SRC_NAT)
|
||||||
|
flow->flags |= FLOW_OFFLOAD_SNAT;
|
|
@ -0,0 +1,80 @@
|
||||||
|
From: Felix Fietkau <nbd@nbd.name>
|
||||||
|
Date: Fri, 16 Feb 2018 10:54:24 +0100
|
||||||
|
Subject: [PATCH] ipv6: make ip6_dst_mtu_forward inline
|
||||||
|
|
||||||
|
Removes a direct dependency on ipv6.ko
|
||||||
|
|
||||||
|
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/net/ip6_route.h
|
||||||
|
+++ b/include/net/ip6_route.h
|
||||||
|
@@ -252,4 +252,26 @@ static inline bool rt6_duplicate_nexthop
|
||||||
|
ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) &&
|
||||||
|
!lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate);
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
|
||||||
|
+{
|
||||||
|
+ unsigned int mtu;
|
||||||
|
+ struct inet6_dev *idev;
|
||||||
|
+
|
||||||
|
+ if (dst_metric_locked(dst, RTAX_MTU)) {
|
||||||
|
+ mtu = dst_metric_raw(dst, RTAX_MTU);
|
||||||
|
+ if (mtu)
|
||||||
|
+ return mtu;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ mtu = IPV6_MIN_MTU;
|
||||||
|
+ rcu_read_lock();
|
||||||
|
+ idev = __in6_dev_get(dst->dev);
|
||||||
|
+ if (idev)
|
||||||
|
+ mtu = idev->cnf.mtu6;
|
||||||
|
+ rcu_read_unlock();
|
||||||
|
+
|
||||||
|
+ return mtu;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
#endif
|
||||||
|
--- a/include/net/ipv6.h
|
||||||
|
+++ b/include/net/ipv6.h
|
||||||
|
@@ -889,8 +889,6 @@ static inline struct sk_buff *ip6_finish
|
||||||
|
&inet6_sk(sk)->cork);
|
||||||
|
}
|
||||||
|
|
||||||
|
-unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst);
|
||||||
|
-
|
||||||
|
int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
|
||||||
|
struct flowi6 *fl6);
|
||||||
|
struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
|
||||||
|
--- a/net/ipv6/ip6_output.c
|
||||||
|
+++ b/net/ipv6/ip6_output.c
|
||||||
|
@@ -381,28 +381,6 @@ static inline int ip6_forward_finish(str
|
||||||
|
return dst_output(net, sk, skb);
|
||||||
|
}
|
||||||
|
|
||||||
|
-unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
|
||||||
|
-{
|
||||||
|
- unsigned int mtu;
|
||||||
|
- struct inet6_dev *idev;
|
||||||
|
-
|
||||||
|
- if (dst_metric_locked(dst, RTAX_MTU)) {
|
||||||
|
- mtu = dst_metric_raw(dst, RTAX_MTU);
|
||||||
|
- if (mtu)
|
||||||
|
- return mtu;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- mtu = IPV6_MIN_MTU;
|
||||||
|
- rcu_read_lock();
|
||||||
|
- idev = __in6_dev_get(dst->dev);
|
||||||
|
- if (idev)
|
||||||
|
- mtu = idev->cnf.mtu6;
|
||||||
|
- rcu_read_unlock();
|
||||||
|
-
|
||||||
|
- return mtu;
|
||||||
|
-}
|
||||||
|
-EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
|
||||||
|
-
|
||||||
|
static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
|
||||||
|
{
|
||||||
|
if (skb->len <= mtu)
|
|
@ -0,0 +1,145 @@
|
||||||
|
From: Felix Fietkau <nbd@nbd.name>
|
||||||
|
Date: Fri, 16 Feb 2018 10:57:23 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_flow_table: cache mtu in struct
|
||||||
|
flow_offload_tuple
|
||||||
|
|
||||||
|
Reduces the number of cache lines touched in the offload forwarding path
|
||||||
|
|
||||||
|
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/net/netfilter/nf_flow_table.h
|
||||||
|
+++ b/include/net/netfilter/nf_flow_table.h
|
||||||
|
@@ -55,6 +55,8 @@ struct flow_offload_tuple {
|
||||||
|
|
||||||
|
int oifidx;
|
||||||
|
|
||||||
|
+ u16 mtu;
|
||||||
|
+
|
||||||
|
struct dst_entry *dst_cache;
|
||||||
|
};
|
||||||
|
|
||||||
|
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
|
||||||
|
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
|
||||||
|
@@ -177,7 +177,7 @@ static int nf_flow_tuple_ip(struct sk_bu
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Based on ip_exceeds_mtu(). */
|
||||||
|
-static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
|
||||||
|
+static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
|
||||||
|
{
|
||||||
|
if (skb->len <= mtu)
|
||||||
|
return false;
|
||||||
|
@@ -191,17 +191,6 @@ static bool __nf_flow_exceeds_mtu(const
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rtable *rt)
|
||||||
|
-{
|
||||||
|
- u32 mtu;
|
||||||
|
-
|
||||||
|
- mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
|
||||||
|
- if (__nf_flow_exceeds_mtu(skb, mtu))
|
||||||
|
- return true;
|
||||||
|
-
|
||||||
|
- return false;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
unsigned int
|
||||||
|
nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
|
||||||
|
const struct nf_hook_state *state)
|
||||||
|
@@ -232,9 +221,9 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||||
|
|
||||||
|
dir = tuplehash->tuple.dir;
|
||||||
|
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
|
||||||
|
-
|
||||||
|
rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
|
||||||
|
- if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
|
||||||
|
+
|
||||||
|
+ if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
|
||||||
|
return NF_ACCEPT;
|
||||||
|
|
||||||
|
if (skb_try_make_writable(skb, sizeof(*iph)))
|
||||||
|
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
|
||||||
|
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
|
||||||
|
@@ -173,7 +173,7 @@ static int nf_flow_tuple_ipv6(struct sk_
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Based on ip_exceeds_mtu(). */
|
||||||
|
-static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
|
||||||
|
+static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
|
||||||
|
{
|
||||||
|
if (skb->len <= mtu)
|
||||||
|
return false;
|
||||||
|
@@ -184,17 +184,6 @@ static bool __nf_flow_exceeds_mtu(const
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt)
|
||||||
|
-{
|
||||||
|
- u32 mtu;
|
||||||
|
-
|
||||||
|
- mtu = ip6_dst_mtu_forward(&rt->dst);
|
||||||
|
- if (__nf_flow_exceeds_mtu(skb, mtu))
|
||||||
|
- return true;
|
||||||
|
-
|
||||||
|
- return false;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
unsigned int
|
||||||
|
nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
|
||||||
|
const struct nf_hook_state *state)
|
||||||
|
@@ -225,9 +214,9 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||||
|
|
||||||
|
dir = tuplehash->tuple.dir;
|
||||||
|
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
|
||||||
|
-
|
||||||
|
rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
|
||||||
|
- if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
|
||||||
|
+
|
||||||
|
+ if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
|
||||||
|
return NF_ACCEPT;
|
||||||
|
|
||||||
|
if (skb_try_make_writable(skb, sizeof(*ip6h)))
|
||||||
|
--- a/net/netfilter/nf_flow_table.c
|
||||||
|
+++ b/net/netfilter/nf_flow_table.c
|
||||||
|
@@ -4,6 +4,8 @@
|
||||||
|
#include <linux/netfilter.h>
|
||||||
|
#include <linux/rhashtable.h>
|
||||||
|
#include <linux/netdevice.h>
|
||||||
|
+#include <net/ip.h>
|
||||||
|
+#include <net/ip6_route.h>
|
||||||
|
#include <net/netfilter/nf_tables.h>
|
||||||
|
#include <net/netfilter/nf_flow_table.h>
|
||||||
|
#include <net/netfilter/nf_conntrack.h>
|
||||||
|
@@ -23,6 +25,7 @@ flow_offload_fill_dir(struct flow_offloa
|
||||||
|
{
|
||||||
|
struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
|
||||||
|
struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
|
||||||
|
+ struct dst_entry *dst = route->tuple[dir].dst;
|
||||||
|
|
||||||
|
ft->dir = dir;
|
||||||
|
|
||||||
|
@@ -30,10 +33,12 @@ flow_offload_fill_dir(struct flow_offloa
|
||||||
|
case NFPROTO_IPV4:
|
||||||
|
ft->src_v4 = ctt->src.u3.in;
|
||||||
|
ft->dst_v4 = ctt->dst.u3.in;
|
||||||
|
+ ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
|
||||||
|
break;
|
||||||
|
case NFPROTO_IPV6:
|
||||||
|
ft->src_v6 = ctt->src.u3.in6;
|
||||||
|
ft->dst_v6 = ctt->dst.u3.in6;
|
||||||
|
+ ft->mtu = ip6_dst_mtu_forward(dst);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -44,8 +49,7 @@ flow_offload_fill_dir(struct flow_offloa
|
||||||
|
|
||||||
|
ft->iifidx = route->tuple[dir].ifindex;
|
||||||
|
ft->oifidx = route->tuple[!dir].ifindex;
|
||||||
|
-
|
||||||
|
- ft->dst_cache = route->tuple[dir].dst;
|
||||||
|
+ ft->dst_cache = dst;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct flow_offload *
|
|
@ -0,0 +1,952 @@
|
||||||
|
From: Felix Fietkau <nbd@nbd.name>
|
||||||
|
Date: Fri, 16 Feb 2018 11:08:47 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_flow_table: rename nf_flow_table.c to
|
||||||
|
nf_flow_table_core.c
|
||||||
|
|
||||||
|
Preparation for adding more code to the same module
|
||||||
|
|
||||||
|
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||||
|
---
|
||||||
|
rename net/netfilter/{nf_flow_table.c => nf_flow_table_core.c} (100%)
|
||||||
|
|
||||||
|
--- a/net/netfilter/Makefile
|
||||||
|
+++ b/net/netfilter/Makefile
|
||||||
|
@@ -113,6 +113,8 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_
|
||||||
|
|
||||||
|
# flow table infrastructure
|
||||||
|
obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
|
||||||
|
+nf_flow_table-objs := nf_flow_table_core.o
|
||||||
|
+
|
||||||
|
obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
|
||||||
|
|
||||||
|
# generic X tables
|
||||||
|
--- a/net/netfilter/nf_flow_table.c
|
||||||
|
+++ /dev/null
|
||||||
|
@@ -1,462 +0,0 @@
|
||||||
|
-#include <linux/kernel.h>
|
||||||
|
-#include <linux/init.h>
|
||||||
|
-#include <linux/module.h>
|
||||||
|
-#include <linux/netfilter.h>
|
||||||
|
-#include <linux/rhashtable.h>
|
||||||
|
-#include <linux/netdevice.h>
|
||||||
|
-#include <net/ip.h>
|
||||||
|
-#include <net/ip6_route.h>
|
||||||
|
-#include <net/netfilter/nf_tables.h>
|
||||||
|
-#include <net/netfilter/nf_flow_table.h>
|
||||||
|
-#include <net/netfilter/nf_conntrack.h>
|
||||||
|
-#include <net/netfilter/nf_conntrack_core.h>
|
||||||
|
-#include <net/netfilter/nf_conntrack_tuple.h>
|
||||||
|
-
|
||||||
|
-struct flow_offload_entry {
|
||||||
|
- struct flow_offload flow;
|
||||||
|
- struct nf_conn *ct;
|
||||||
|
- struct rcu_head rcu_head;
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
-static void
|
||||||
|
-flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
|
||||||
|
- struct nf_flow_route *route,
|
||||||
|
- enum flow_offload_tuple_dir dir)
|
||||||
|
-{
|
||||||
|
- struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
|
||||||
|
- struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
|
||||||
|
- struct dst_entry *dst = route->tuple[dir].dst;
|
||||||
|
-
|
||||||
|
- ft->dir = dir;
|
||||||
|
-
|
||||||
|
- switch (ctt->src.l3num) {
|
||||||
|
- case NFPROTO_IPV4:
|
||||||
|
- ft->src_v4 = ctt->src.u3.in;
|
||||||
|
- ft->dst_v4 = ctt->dst.u3.in;
|
||||||
|
- ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
|
||||||
|
- break;
|
||||||
|
- case NFPROTO_IPV6:
|
||||||
|
- ft->src_v6 = ctt->src.u3.in6;
|
||||||
|
- ft->dst_v6 = ctt->dst.u3.in6;
|
||||||
|
- ft->mtu = ip6_dst_mtu_forward(dst);
|
||||||
|
- break;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- ft->l3proto = ctt->src.l3num;
|
||||||
|
- ft->l4proto = ctt->dst.protonum;
|
||||||
|
- ft->src_port = ctt->src.u.tcp.port;
|
||||||
|
- ft->dst_port = ctt->dst.u.tcp.port;
|
||||||
|
-
|
||||||
|
- ft->iifidx = route->tuple[dir].ifindex;
|
||||||
|
- ft->oifidx = route->tuple[!dir].ifindex;
|
||||||
|
- ft->dst_cache = dst;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-struct flow_offload *
|
||||||
|
-flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
|
||||||
|
-{
|
||||||
|
- struct flow_offload_entry *entry;
|
||||||
|
- struct flow_offload *flow;
|
||||||
|
-
|
||||||
|
- if (unlikely(nf_ct_is_dying(ct) ||
|
||||||
|
- !atomic_inc_not_zero(&ct->ct_general.use)))
|
||||||
|
- return NULL;
|
||||||
|
-
|
||||||
|
- entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
|
||||||
|
- if (!entry)
|
||||||
|
- goto err_ct_refcnt;
|
||||||
|
-
|
||||||
|
- flow = &entry->flow;
|
||||||
|
-
|
||||||
|
- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
|
||||||
|
- goto err_dst_cache_original;
|
||||||
|
-
|
||||||
|
- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
|
||||||
|
- goto err_dst_cache_reply;
|
||||||
|
-
|
||||||
|
- entry->ct = ct;
|
||||||
|
-
|
||||||
|
- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
|
||||||
|
- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
|
||||||
|
-
|
||||||
|
- if (ct->status & IPS_SRC_NAT)
|
||||||
|
- flow->flags |= FLOW_OFFLOAD_SNAT;
|
||||||
|
- else if (ct->status & IPS_DST_NAT)
|
||||||
|
- flow->flags |= FLOW_OFFLOAD_DNAT;
|
||||||
|
-
|
||||||
|
- return flow;
|
||||||
|
-
|
||||||
|
-err_dst_cache_reply:
|
||||||
|
- dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
|
||||||
|
-err_dst_cache_original:
|
||||||
|
- kfree(entry);
|
||||||
|
-err_ct_refcnt:
|
||||||
|
- nf_ct_put(ct);
|
||||||
|
-
|
||||||
|
- return NULL;
|
||||||
|
-}
|
||||||
|
-EXPORT_SYMBOL_GPL(flow_offload_alloc);
|
||||||
|
-
|
||||||
|
-void flow_offload_free(struct flow_offload *flow)
|
||||||
|
-{
|
||||||
|
- struct flow_offload_entry *e;
|
||||||
|
-
|
||||||
|
- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
|
||||||
|
- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
|
||||||
|
- e = container_of(flow, struct flow_offload_entry, flow);
|
||||||
|
- nf_ct_delete(e->ct, 0, 0);
|
||||||
|
- nf_ct_put(e->ct);
|
||||||
|
- kfree_rcu(e, rcu_head);
|
||||||
|
-}
|
||||||
|
-EXPORT_SYMBOL_GPL(flow_offload_free);
|
||||||
|
-
|
||||||
|
-void flow_offload_dead(struct flow_offload *flow)
|
||||||
|
-{
|
||||||
|
- flow->flags |= FLOW_OFFLOAD_DYING;
|
||||||
|
-}
|
||||||
|
-EXPORT_SYMBOL_GPL(flow_offload_dead);
|
||||||
|
-
|
||||||
|
-int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
|
||||||
|
-{
|
||||||
|
- flow->timeout = (u32)jiffies;
|
||||||
|
-
|
||||||
|
- rhashtable_insert_fast(&flow_table->rhashtable,
|
||||||
|
- &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
|
||||||
|
- *flow_table->type->params);
|
||||||
|
- rhashtable_insert_fast(&flow_table->rhashtable,
|
||||||
|
- &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
|
||||||
|
- *flow_table->type->params);
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-EXPORT_SYMBOL_GPL(flow_offload_add);
|
||||||
|
-
|
||||||
|
-static void flow_offload_del(struct nf_flowtable *flow_table,
|
||||||
|
- struct flow_offload *flow)
|
||||||
|
-{
|
||||||
|
- rhashtable_remove_fast(&flow_table->rhashtable,
|
||||||
|
- &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
|
||||||
|
- *flow_table->type->params);
|
||||||
|
- rhashtable_remove_fast(&flow_table->rhashtable,
|
||||||
|
- &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
|
||||||
|
- *flow_table->type->params);
|
||||||
|
-
|
||||||
|
- flow_offload_free(flow);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-struct flow_offload_tuple_rhash *
|
||||||
|
-flow_offload_lookup(struct nf_flowtable *flow_table,
|
||||||
|
- struct flow_offload_tuple *tuple)
|
||||||
|
-{
|
||||||
|
- return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
|
||||||
|
- *flow_table->type->params);
|
||||||
|
-}
|
||||||
|
-EXPORT_SYMBOL_GPL(flow_offload_lookup);
|
||||||
|
-
|
||||||
|
-int nf_flow_table_iterate(struct nf_flowtable *flow_table,
|
||||||
|
- void (*iter)(struct flow_offload *flow, void *data),
|
||||||
|
- void *data)
|
||||||
|
-{
|
||||||
|
- struct flow_offload_tuple_rhash *tuplehash;
|
||||||
|
- struct rhashtable_iter hti;
|
||||||
|
- struct flow_offload *flow;
|
||||||
|
- int err;
|
||||||
|
-
|
||||||
|
- err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
|
||||||
|
- if (err)
|
||||||
|
- return err;
|
||||||
|
-
|
||||||
|
- rhashtable_walk_start(&hti);
|
||||||
|
-
|
||||||
|
- while ((tuplehash = rhashtable_walk_next(&hti))) {
|
||||||
|
- if (IS_ERR(tuplehash)) {
|
||||||
|
- err = PTR_ERR(tuplehash);
|
||||||
|
- if (err != -EAGAIN)
|
||||||
|
- goto out;
|
||||||
|
-
|
||||||
|
- continue;
|
||||||
|
- }
|
||||||
|
- if (tuplehash->tuple.dir)
|
||||||
|
- continue;
|
||||||
|
-
|
||||||
|
- flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
|
||||||
|
-
|
||||||
|
- iter(flow, data);
|
||||||
|
- }
|
||||||
|
-out:
|
||||||
|
- rhashtable_walk_stop(&hti);
|
||||||
|
- rhashtable_walk_exit(&hti);
|
||||||
|
-
|
||||||
|
- return err;
|
||||||
|
-}
|
||||||
|
-EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
|
||||||
|
-
|
||||||
|
-static inline bool nf_flow_has_expired(const struct flow_offload *flow)
|
||||||
|
-{
|
||||||
|
- return (__s32)(flow->timeout - (u32)jiffies) <= 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static inline bool nf_flow_is_dying(const struct flow_offload *flow)
|
||||||
|
-{
|
||||||
|
- return flow->flags & FLOW_OFFLOAD_DYING;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
|
||||||
|
-{
|
||||||
|
- struct flow_offload_tuple_rhash *tuplehash;
|
||||||
|
- struct rhashtable_iter hti;
|
||||||
|
- struct flow_offload *flow;
|
||||||
|
- int err;
|
||||||
|
-
|
||||||
|
- err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
|
||||||
|
- if (err)
|
||||||
|
- return 0;
|
||||||
|
-
|
||||||
|
- rhashtable_walk_start(&hti);
|
||||||
|
-
|
||||||
|
- while ((tuplehash = rhashtable_walk_next(&hti))) {
|
||||||
|
- if (IS_ERR(tuplehash)) {
|
||||||
|
- err = PTR_ERR(tuplehash);
|
||||||
|
- if (err != -EAGAIN)
|
||||||
|
- goto out;
|
||||||
|
-
|
||||||
|
- continue;
|
||||||
|
- }
|
||||||
|
- if (tuplehash->tuple.dir)
|
||||||
|
- continue;
|
||||||
|
-
|
||||||
|
- flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
|
||||||
|
-
|
||||||
|
- if (nf_flow_has_expired(flow) ||
|
||||||
|
- nf_flow_is_dying(flow))
|
||||||
|
- flow_offload_del(flow_table, flow);
|
||||||
|
- }
|
||||||
|
-out:
|
||||||
|
- rhashtable_walk_stop(&hti);
|
||||||
|
- rhashtable_walk_exit(&hti);
|
||||||
|
-
|
||||||
|
- return 1;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-void nf_flow_offload_work_gc(struct work_struct *work)
|
||||||
|
-{
|
||||||
|
- struct nf_flowtable *flow_table;
|
||||||
|
-
|
||||||
|
- flow_table = container_of(work, struct nf_flowtable, gc_work.work);
|
||||||
|
- nf_flow_offload_gc_step(flow_table);
|
||||||
|
- queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
|
||||||
|
-}
|
||||||
|
-EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
|
||||||
|
-
|
||||||
|
-static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
|
||||||
|
-{
|
||||||
|
- const struct flow_offload_tuple *tuple = data;
|
||||||
|
-
|
||||||
|
- return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
|
||||||
|
-{
|
||||||
|
- const struct flow_offload_tuple_rhash *tuplehash = data;
|
||||||
|
-
|
||||||
|
- return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
|
||||||
|
- const void *ptr)
|
||||||
|
-{
|
||||||
|
- const struct flow_offload_tuple *tuple = arg->key;
|
||||||
|
- const struct flow_offload_tuple_rhash *x = ptr;
|
||||||
|
-
|
||||||
|
- if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
|
||||||
|
- return 1;
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-const struct rhashtable_params nf_flow_offload_rhash_params = {
|
||||||
|
- .head_offset = offsetof(struct flow_offload_tuple_rhash, node),
|
||||||
|
- .hashfn = flow_offload_hash,
|
||||||
|
- .obj_hashfn = flow_offload_hash_obj,
|
||||||
|
- .obj_cmpfn = flow_offload_hash_cmp,
|
||||||
|
- .automatic_shrinking = true,
|
||||||
|
-};
|
||||||
|
-EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params);
|
||||||
|
-
|
||||||
|
-static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
|
||||||
|
- __be16 port, __be16 new_port)
|
||||||
|
-{
|
||||||
|
- struct tcphdr *tcph;
|
||||||
|
-
|
||||||
|
- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
|
||||||
|
- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
|
||||||
|
- return -1;
|
||||||
|
-
|
||||||
|
- tcph = (void *)(skb_network_header(skb) + thoff);
|
||||||
|
- inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
|
||||||
|
- __be16 port, __be16 new_port)
|
||||||
|
-{
|
||||||
|
- struct udphdr *udph;
|
||||||
|
-
|
||||||
|
- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
|
||||||
|
- skb_try_make_writable(skb, thoff + sizeof(*udph)))
|
||||||
|
- return -1;
|
||||||
|
-
|
||||||
|
- udph = (void *)(skb_network_header(skb) + thoff);
|
||||||
|
- if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
|
||||||
|
- inet_proto_csum_replace2(&udph->check, skb, port,
|
||||||
|
- new_port, true);
|
||||||
|
- if (!udph->check)
|
||||||
|
- udph->check = CSUM_MANGLED_0;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
|
||||||
|
- u8 protocol, __be16 port, __be16 new_port)
|
||||||
|
-{
|
||||||
|
- switch (protocol) {
|
||||||
|
- case IPPROTO_TCP:
|
||||||
|
- if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
|
||||||
|
- return NF_DROP;
|
||||||
|
- break;
|
||||||
|
- case IPPROTO_UDP:
|
||||||
|
- if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
|
||||||
|
- return NF_DROP;
|
||||||
|
- break;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-int nf_flow_snat_port(const struct flow_offload *flow,
|
||||||
|
- struct sk_buff *skb, unsigned int thoff,
|
||||||
|
- u8 protocol, enum flow_offload_tuple_dir dir)
|
||||||
|
-{
|
||||||
|
- struct flow_ports *hdr;
|
||||||
|
- __be16 port, new_port;
|
||||||
|
-
|
||||||
|
- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
|
||||||
|
- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
|
||||||
|
- return -1;
|
||||||
|
-
|
||||||
|
- hdr = (void *)(skb_network_header(skb) + thoff);
|
||||||
|
-
|
||||||
|
- switch (dir) {
|
||||||
|
- case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||||
|
- port = hdr->source;
|
||||||
|
- new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
|
||||||
|
- hdr->source = new_port;
|
||||||
|
- break;
|
||||||
|
- case FLOW_OFFLOAD_DIR_REPLY:
|
||||||
|
- port = hdr->dest;
|
||||||
|
- new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
|
||||||
|
- hdr->dest = new_port;
|
||||||
|
- break;
|
||||||
|
- default:
|
||||||
|
- return -1;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
|
||||||
|
-}
|
||||||
|
-EXPORT_SYMBOL_GPL(nf_flow_snat_port);
|
||||||
|
-
|
||||||
|
-int nf_flow_dnat_port(const struct flow_offload *flow,
|
||||||
|
- struct sk_buff *skb, unsigned int thoff,
|
||||||
|
- u8 protocol, enum flow_offload_tuple_dir dir)
|
||||||
|
-{
|
||||||
|
- struct flow_ports *hdr;
|
||||||
|
- __be16 port, new_port;
|
||||||
|
-
|
||||||
|
- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
|
||||||
|
- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
|
||||||
|
- return -1;
|
||||||
|
-
|
||||||
|
- hdr = (void *)(skb_network_header(skb) + thoff);
|
||||||
|
-
|
||||||
|
- switch (dir) {
|
||||||
|
- case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||||
|
- port = hdr->dest;
|
||||||
|
- new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port;
|
||||||
|
- hdr->dest = new_port;
|
||||||
|
- break;
|
||||||
|
- case FLOW_OFFLOAD_DIR_REPLY:
|
||||||
|
- port = hdr->source;
|
||||||
|
- new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
|
||||||
|
- hdr->source = new_port;
|
||||||
|
- break;
|
||||||
|
- default:
|
||||||
|
- return -1;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
|
||||||
|
-}
|
||||||
|
-EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
|
||||||
|
-
|
||||||
|
-static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
|
||||||
|
-{
|
||||||
|
- struct net_device *dev = data;
|
||||||
|
-
|
||||||
|
- if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
|
||||||
|
- return;
|
||||||
|
-
|
||||||
|
- flow_offload_dead(flow);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
|
||||||
|
- void *data)
|
||||||
|
-{
|
||||||
|
- nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data);
|
||||||
|
- flush_delayed_work(&flowtable->gc_work);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
|
||||||
|
-{
|
||||||
|
- nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev);
|
||||||
|
-}
|
||||||
|
-EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
|
||||||
|
-
|
||||||
|
-void nf_flow_table_free(struct nf_flowtable *flow_table)
|
||||||
|
-{
|
||||||
|
- nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
|
||||||
|
- WARN_ON(!nf_flow_offload_gc_step(flow_table));
|
||||||
|
-}
|
||||||
|
-EXPORT_SYMBOL_GPL(nf_flow_table_free);
|
||||||
|
-
|
||||||
|
-static int nf_flow_table_netdev_event(struct notifier_block *this,
|
||||||
|
- unsigned long event, void *ptr)
|
||||||
|
-{
|
||||||
|
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
|
||||||
|
-
|
||||||
|
- if (event != NETDEV_DOWN)
|
||||||
|
- return NOTIFY_DONE;
|
||||||
|
-
|
||||||
|
- nf_flow_table_cleanup(dev_net(dev), dev);
|
||||||
|
-
|
||||||
|
- return NOTIFY_DONE;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static struct notifier_block flow_offload_netdev_notifier = {
|
||||||
|
- .notifier_call = nf_flow_table_netdev_event,
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
-static int __init nf_flow_table_module_init(void)
|
||||||
|
-{
|
||||||
|
- return register_netdevice_notifier(&flow_offload_netdev_notifier);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static void __exit nf_flow_table_module_exit(void)
|
||||||
|
-{
|
||||||
|
- unregister_netdevice_notifier(&flow_offload_netdev_notifier);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-module_init(nf_flow_table_module_init);
|
||||||
|
-module_exit(nf_flow_table_module_exit);
|
||||||
|
-
|
||||||
|
-MODULE_LICENSE("GPL");
|
||||||
|
-MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/net/netfilter/nf_flow_table_core.c
|
||||||
|
@@ -0,0 +1,462 @@
|
||||||
|
+#include <linux/kernel.h>
|
||||||
|
+#include <linux/init.h>
|
||||||
|
+#include <linux/module.h>
|
||||||
|
+#include <linux/netfilter.h>
|
||||||
|
+#include <linux/rhashtable.h>
|
||||||
|
+#include <linux/netdevice.h>
|
||||||
|
+#include <net/ip.h>
|
||||||
|
+#include <net/ip6_route.h>
|
||||||
|
+#include <net/netfilter/nf_tables.h>
|
||||||
|
+#include <net/netfilter/nf_flow_table.h>
|
||||||
|
+#include <net/netfilter/nf_conntrack.h>
|
||||||
|
+#include <net/netfilter/nf_conntrack_core.h>
|
||||||
|
+#include <net/netfilter/nf_conntrack_tuple.h>
|
||||||
|
+
|
||||||
|
+struct flow_offload_entry {
|
||||||
|
+ struct flow_offload flow;
|
||||||
|
+ struct nf_conn *ct;
|
||||||
|
+ struct rcu_head rcu_head;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
|
||||||
|
+ struct nf_flow_route *route,
|
||||||
|
+ enum flow_offload_tuple_dir dir)
|
||||||
|
+{
|
||||||
|
+ struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
|
||||||
|
+ struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
|
||||||
|
+ struct dst_entry *dst = route->tuple[dir].dst;
|
||||||
|
+
|
||||||
|
+ ft->dir = dir;
|
||||||
|
+
|
||||||
|
+ switch (ctt->src.l3num) {
|
||||||
|
+ case NFPROTO_IPV4:
|
||||||
|
+ ft->src_v4 = ctt->src.u3.in;
|
||||||
|
+ ft->dst_v4 = ctt->dst.u3.in;
|
||||||
|
+ ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
|
||||||
|
+ break;
|
||||||
|
+ case NFPROTO_IPV6:
|
||||||
|
+ ft->src_v6 = ctt->src.u3.in6;
|
||||||
|
+ ft->dst_v6 = ctt->dst.u3.in6;
|
||||||
|
+ ft->mtu = ip6_dst_mtu_forward(dst);
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ ft->l3proto = ctt->src.l3num;
|
||||||
|
+ ft->l4proto = ctt->dst.protonum;
|
||||||
|
+ ft->src_port = ctt->src.u.tcp.port;
|
||||||
|
+ ft->dst_port = ctt->dst.u.tcp.port;
|
||||||
|
+
|
||||||
|
+ ft->iifidx = route->tuple[dir].ifindex;
|
||||||
|
+ ft->oifidx = route->tuple[!dir].ifindex;
|
||||||
|
+ ft->dst_cache = dst;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+struct flow_offload *
|
||||||
|
+flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
|
||||||
|
+{
|
||||||
|
+ struct flow_offload_entry *entry;
|
||||||
|
+ struct flow_offload *flow;
|
||||||
|
+
|
||||||
|
+ if (unlikely(nf_ct_is_dying(ct) ||
|
||||||
|
+ !atomic_inc_not_zero(&ct->ct_general.use)))
|
||||||
|
+ return NULL;
|
||||||
|
+
|
||||||
|
+ entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
|
||||||
|
+ if (!entry)
|
||||||
|
+ goto err_ct_refcnt;
|
||||||
|
+
|
||||||
|
+ flow = &entry->flow;
|
||||||
|
+
|
||||||
|
+ if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
|
||||||
|
+ goto err_dst_cache_original;
|
||||||
|
+
|
||||||
|
+ if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
|
||||||
|
+ goto err_dst_cache_reply;
|
||||||
|
+
|
||||||
|
+ entry->ct = ct;
|
||||||
|
+
|
||||||
|
+ flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
|
||||||
|
+ flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
|
||||||
|
+
|
||||||
|
+ if (ct->status & IPS_SRC_NAT)
|
||||||
|
+ flow->flags |= FLOW_OFFLOAD_SNAT;
|
||||||
|
+ else if (ct->status & IPS_DST_NAT)
|
||||||
|
+ flow->flags |= FLOW_OFFLOAD_DNAT;
|
||||||
|
+
|
||||||
|
+ return flow;
|
||||||
|
+
|
||||||
|
+err_dst_cache_reply:
|
||||||
|
+ dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
|
||||||
|
+err_dst_cache_original:
|
||||||
|
+ kfree(entry);
|
||||||
|
+err_ct_refcnt:
|
||||||
|
+ nf_ct_put(ct);
|
||||||
|
+
|
||||||
|
+ return NULL;
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(flow_offload_alloc);
|
||||||
|
+
|
||||||
|
+void flow_offload_free(struct flow_offload *flow)
|
||||||
|
+{
|
||||||
|
+ struct flow_offload_entry *e;
|
||||||
|
+
|
||||||
|
+ dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
|
||||||
|
+ dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
|
||||||
|
+ e = container_of(flow, struct flow_offload_entry, flow);
|
||||||
|
+ nf_ct_delete(e->ct, 0, 0);
|
||||||
|
+ nf_ct_put(e->ct);
|
||||||
|
+ kfree_rcu(e, rcu_head);
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(flow_offload_free);
|
||||||
|
+
|
||||||
|
+void flow_offload_dead(struct flow_offload *flow)
|
||||||
|
+{
|
||||||
|
+ flow->flags |= FLOW_OFFLOAD_DYING;
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(flow_offload_dead);
|
||||||
|
+
|
||||||
|
+int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
|
||||||
|
+{
|
||||||
|
+ flow->timeout = (u32)jiffies;
|
||||||
|
+
|
||||||
|
+ rhashtable_insert_fast(&flow_table->rhashtable,
|
||||||
|
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
|
||||||
|
+ *flow_table->type->params);
|
||||||
|
+ rhashtable_insert_fast(&flow_table->rhashtable,
|
||||||
|
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
|
||||||
|
+ *flow_table->type->params);
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(flow_offload_add);
|
||||||
|
+
|
||||||
|
+static void flow_offload_del(struct nf_flowtable *flow_table,
|
||||||
|
+ struct flow_offload *flow)
|
||||||
|
+{
|
||||||
|
+ rhashtable_remove_fast(&flow_table->rhashtable,
|
||||||
|
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
|
||||||
|
+ *flow_table->type->params);
|
||||||
|
+ rhashtable_remove_fast(&flow_table->rhashtable,
|
||||||
|
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
|
||||||
|
+ *flow_table->type->params);
|
||||||
|
+
|
||||||
|
+ flow_offload_free(flow);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+struct flow_offload_tuple_rhash *
|
||||||
|
+flow_offload_lookup(struct nf_flowtable *flow_table,
|
||||||
|
+ struct flow_offload_tuple *tuple)
|
||||||
|
+{
|
||||||
|
+ return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
|
||||||
|
+ *flow_table->type->params);
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(flow_offload_lookup);
|
||||||
|
+
|
||||||
|
+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
|
||||||
|
+ void (*iter)(struct flow_offload *flow, void *data),
|
||||||
|
+ void *data)
|
||||||
|
+{
|
||||||
|
+ struct flow_offload_tuple_rhash *tuplehash;
|
||||||
|
+ struct rhashtable_iter hti;
|
||||||
|
+ struct flow_offload *flow;
|
||||||
|
+ int err;
|
||||||
|
+
|
||||||
|
+ err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
|
||||||
|
+ if (err)
|
||||||
|
+ return err;
|
||||||
|
+
|
||||||
|
+ rhashtable_walk_start(&hti);
|
||||||
|
+
|
||||||
|
+ while ((tuplehash = rhashtable_walk_next(&hti))) {
|
||||||
|
+ if (IS_ERR(tuplehash)) {
|
||||||
|
+ err = PTR_ERR(tuplehash);
|
||||||
|
+ if (err != -EAGAIN)
|
||||||
|
+ goto out;
|
||||||
|
+
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+ if (tuplehash->tuple.dir)
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
|
||||||
|
+
|
||||||
|
+ iter(flow, data);
|
||||||
|
+ }
|
||||||
|
+out:
|
||||||
|
+ rhashtable_walk_stop(&hti);
|
||||||
|
+ rhashtable_walk_exit(&hti);
|
||||||
|
+
|
||||||
|
+ return err;
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
|
||||||
|
+
|
||||||
|
+static inline bool nf_flow_has_expired(const struct flow_offload *flow)
|
||||||
|
+{
|
||||||
|
+ return (__s32)(flow->timeout - (u32)jiffies) <= 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static inline bool nf_flow_is_dying(const struct flow_offload *flow)
|
||||||
|
+{
|
||||||
|
+ return flow->flags & FLOW_OFFLOAD_DYING;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
|
||||||
|
+{
|
||||||
|
+ struct flow_offload_tuple_rhash *tuplehash;
|
||||||
|
+ struct rhashtable_iter hti;
|
||||||
|
+ struct flow_offload *flow;
|
||||||
|
+ int err;
|
||||||
|
+
|
||||||
|
+ err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
|
||||||
|
+ if (err)
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+ rhashtable_walk_start(&hti);
|
||||||
|
+
|
||||||
|
+ while ((tuplehash = rhashtable_walk_next(&hti))) {
|
||||||
|
+ if (IS_ERR(tuplehash)) {
|
||||||
|
+ err = PTR_ERR(tuplehash);
|
||||||
|
+ if (err != -EAGAIN)
|
||||||
|
+ goto out;
|
||||||
|
+
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+ if (tuplehash->tuple.dir)
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
|
||||||
|
+
|
||||||
|
+ if (nf_flow_has_expired(flow) ||
|
||||||
|
+ nf_flow_is_dying(flow))
|
||||||
|
+ flow_offload_del(flow_table, flow);
|
||||||
|
+ }
|
||||||
|
+out:
|
||||||
|
+ rhashtable_walk_stop(&hti);
|
||||||
|
+ rhashtable_walk_exit(&hti);
|
||||||
|
+
|
||||||
|
+ return 1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void nf_flow_offload_work_gc(struct work_struct *work)
|
||||||
|
+{
|
||||||
|
+ struct nf_flowtable *flow_table;
|
||||||
|
+
|
||||||
|
+ flow_table = container_of(work, struct nf_flowtable, gc_work.work);
|
||||||
|
+ nf_flow_offload_gc_step(flow_table);
|
||||||
|
+ queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
|
||||||
|
+
|
||||||
|
+static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
|
||||||
|
+{
|
||||||
|
+ const struct flow_offload_tuple *tuple = data;
|
||||||
|
+
|
||||||
|
+ return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
|
||||||
|
+{
|
||||||
|
+ const struct flow_offload_tuple_rhash *tuplehash = data;
|
||||||
|
+
|
||||||
|
+ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
|
||||||
|
+ const void *ptr)
|
||||||
|
+{
|
||||||
|
+ const struct flow_offload_tuple *tuple = arg->key;
|
||||||
|
+ const struct flow_offload_tuple_rhash *x = ptr;
|
||||||
|
+
|
||||||
|
+ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
|
||||||
|
+ return 1;
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+const struct rhashtable_params nf_flow_offload_rhash_params = {
|
||||||
|
+ .head_offset = offsetof(struct flow_offload_tuple_rhash, node),
|
||||||
|
+ .hashfn = flow_offload_hash,
|
||||||
|
+ .obj_hashfn = flow_offload_hash_obj,
|
||||||
|
+ .obj_cmpfn = flow_offload_hash_cmp,
|
||||||
|
+ .automatic_shrinking = true,
|
||||||
|
+};
|
||||||
|
+EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params);
|
||||||
|
+
|
||||||
|
+static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
|
||||||
|
+ __be16 port, __be16 new_port)
|
||||||
|
+{
|
||||||
|
+ struct tcphdr *tcph;
|
||||||
|
+
|
||||||
|
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
|
||||||
|
+ skb_try_make_writable(skb, thoff + sizeof(*tcph)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ tcph = (void *)(skb_network_header(skb) + thoff);
|
||||||
|
+ inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
|
||||||
|
+ __be16 port, __be16 new_port)
|
||||||
|
+{
|
||||||
|
+ struct udphdr *udph;
|
||||||
|
+
|
||||||
|
+ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
|
||||||
|
+ skb_try_make_writable(skb, thoff + sizeof(*udph)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ udph = (void *)(skb_network_header(skb) + thoff);
|
||||||
|
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
|
||||||
|
+ inet_proto_csum_replace2(&udph->check, skb, port,
|
||||||
|
+ new_port, true);
|
||||||
|
+ if (!udph->check)
|
||||||
|
+ udph->check = CSUM_MANGLED_0;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
|
||||||
|
+ u8 protocol, __be16 port, __be16 new_port)
|
||||||
|
+{
|
||||||
|
+ switch (protocol) {
|
||||||
|
+ case IPPROTO_TCP:
|
||||||
|
+ if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
|
||||||
|
+ return NF_DROP;
|
||||||
|
+ break;
|
||||||
|
+ case IPPROTO_UDP:
|
||||||
|
+ if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
|
||||||
|
+ return NF_DROP;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int nf_flow_snat_port(const struct flow_offload *flow,
|
||||||
|
+ struct sk_buff *skb, unsigned int thoff,
|
||||||
|
+ u8 protocol, enum flow_offload_tuple_dir dir)
|
||||||
|
+{
|
||||||
|
+ struct flow_ports *hdr;
|
||||||
|
+ __be16 port, new_port;
|
||||||
|
+
|
||||||
|
+ if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
|
||||||
|
+ skb_try_make_writable(skb, thoff + sizeof(*hdr)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ hdr = (void *)(skb_network_header(skb) + thoff);
|
||||||
|
+
|
||||||
|
+ switch (dir) {
|
||||||
|
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||||
|
+ port = hdr->source;
|
||||||
|
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
|
||||||
|
+ hdr->source = new_port;
|
||||||
|
+ break;
|
||||||
|
+ case FLOW_OFFLOAD_DIR_REPLY:
|
||||||
|
+ port = hdr->dest;
|
||||||
|
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
|
||||||
|
+ hdr->dest = new_port;
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(nf_flow_snat_port);
|
||||||
|
+
|
||||||
|
+int nf_flow_dnat_port(const struct flow_offload *flow,
|
||||||
|
+ struct sk_buff *skb, unsigned int thoff,
|
||||||
|
+ u8 protocol, enum flow_offload_tuple_dir dir)
|
||||||
|
+{
|
||||||
|
+ struct flow_ports *hdr;
|
||||||
|
+ __be16 port, new_port;
|
||||||
|
+
|
||||||
|
+ if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
|
||||||
|
+ skb_try_make_writable(skb, thoff + sizeof(*hdr)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ hdr = (void *)(skb_network_header(skb) + thoff);
|
||||||
|
+
|
||||||
|
+ switch (dir) {
|
||||||
|
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||||
|
+ port = hdr->dest;
|
||||||
|
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port;
|
||||||
|
+ hdr->dest = new_port;
|
||||||
|
+ break;
|
||||||
|
+ case FLOW_OFFLOAD_DIR_REPLY:
|
||||||
|
+ port = hdr->source;
|
||||||
|
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
|
||||||
|
+ hdr->source = new_port;
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
|
||||||
|
+
|
||||||
|
+static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
|
||||||
|
+{
|
||||||
|
+ struct net_device *dev = data;
|
||||||
|
+
|
||||||
|
+ if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ flow_offload_dead(flow);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
|
||||||
|
+ void *data)
|
||||||
|
+{
|
||||||
|
+ nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data);
|
||||||
|
+ flush_delayed_work(&flowtable->gc_work);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
|
||||||
|
+{
|
||||||
|
+ nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev);
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
|
||||||
|
+
|
||||||
|
+void nf_flow_table_free(struct nf_flowtable *flow_table)
|
||||||
|
+{
|
||||||
|
+ nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
|
||||||
|
+ WARN_ON(!nf_flow_offload_gc_step(flow_table));
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(nf_flow_table_free);
|
||||||
|
+
|
||||||
|
+static int nf_flow_table_netdev_event(struct notifier_block *this,
|
||||||
|
+ unsigned long event, void *ptr)
|
||||||
|
+{
|
||||||
|
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
|
||||||
|
+
|
||||||
|
+ if (event != NETDEV_DOWN)
|
||||||
|
+ return NOTIFY_DONE;
|
||||||
|
+
|
||||||
|
+ nf_flow_table_cleanup(dev_net(dev), dev);
|
||||||
|
+
|
||||||
|
+ return NOTIFY_DONE;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static struct notifier_block flow_offload_netdev_notifier = {
|
||||||
|
+ .notifier_call = nf_flow_table_netdev_event,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static int __init nf_flow_table_module_init(void)
|
||||||
|
+{
|
||||||
|
+ return register_netdevice_notifier(&flow_offload_netdev_notifier);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void __exit nf_flow_table_module_exit(void)
|
||||||
|
+{
|
||||||
|
+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+module_init(nf_flow_table_module_init);
|
||||||
|
+module_exit(nf_flow_table_module_exit);
|
||||||
|
+
|
||||||
|
+MODULE_LICENSE("GPL");
|
||||||
|
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
|
|
@ -0,0 +1,522 @@
|
||||||
|
From: Felix Fietkau <nbd@nbd.name>
|
||||||
|
Date: Sat, 17 Feb 2018 11:49:44 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_flow_table: move ipv4 offload hook code to
|
||||||
|
nf_flow_table
|
||||||
|
|
||||||
|
Allows some minor code sharing with the ipv6 hook code and is also
|
||||||
|
useful as preparation for adding iptables support for offload
|
||||||
|
|
||||||
|
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||||
|
---
|
||||||
|
create mode 100644 net/netfilter/nf_flow_table_ip.c
|
||||||
|
|
||||||
|
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
|
||||||
|
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
|
||||||
|
@@ -2,248 +2,8 @@
|
||||||
|
#include <linux/init.h>
|
||||||
|
#include <linux/module.h>
|
||||||
|
#include <linux/netfilter.h>
|
||||||
|
-#include <linux/rhashtable.h>
|
||||||
|
-#include <linux/ip.h>
|
||||||
|
-#include <linux/netdevice.h>
|
||||||
|
-#include <net/ip.h>
|
||||||
|
-#include <net/neighbour.h>
|
||||||
|
#include <net/netfilter/nf_flow_table.h>
|
||||||
|
#include <net/netfilter/nf_tables.h>
|
||||||
|
-/* For layer 4 checksum field offset. */
|
||||||
|
-#include <linux/tcp.h>
|
||||||
|
-#include <linux/udp.h>
|
||||||
|
-
|
||||||
|
-static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
|
||||||
|
- __be32 addr, __be32 new_addr)
|
||||||
|
-{
|
||||||
|
- struct tcphdr *tcph;
|
||||||
|
-
|
||||||
|
- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
|
||||||
|
- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
|
||||||
|
- return -1;
|
||||||
|
-
|
||||||
|
- tcph = (void *)(skb_network_header(skb) + thoff);
|
||||||
|
- inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
|
||||||
|
- __be32 addr, __be32 new_addr)
|
||||||
|
-{
|
||||||
|
- struct udphdr *udph;
|
||||||
|
-
|
||||||
|
- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
|
||||||
|
- skb_try_make_writable(skb, thoff + sizeof(*udph)))
|
||||||
|
- return -1;
|
||||||
|
-
|
||||||
|
- udph = (void *)(skb_network_header(skb) + thoff);
|
||||||
|
- if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
|
||||||
|
- inet_proto_csum_replace4(&udph->check, skb, addr,
|
||||||
|
- new_addr, true);
|
||||||
|
- if (!udph->check)
|
||||||
|
- udph->check = CSUM_MANGLED_0;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
|
||||||
|
- unsigned int thoff, __be32 addr,
|
||||||
|
- __be32 new_addr)
|
||||||
|
-{
|
||||||
|
- switch (iph->protocol) {
|
||||||
|
- case IPPROTO_TCP:
|
||||||
|
- if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
|
||||||
|
- return NF_DROP;
|
||||||
|
- break;
|
||||||
|
- case IPPROTO_UDP:
|
||||||
|
- if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
|
||||||
|
- return NF_DROP;
|
||||||
|
- break;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
|
||||||
|
- struct iphdr *iph, unsigned int thoff,
|
||||||
|
- enum flow_offload_tuple_dir dir)
|
||||||
|
-{
|
||||||
|
- __be32 addr, new_addr;
|
||||||
|
-
|
||||||
|
- switch (dir) {
|
||||||
|
- case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||||
|
- addr = iph->saddr;
|
||||||
|
- new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
|
||||||
|
- iph->saddr = new_addr;
|
||||||
|
- break;
|
||||||
|
- case FLOW_OFFLOAD_DIR_REPLY:
|
||||||
|
- addr = iph->daddr;
|
||||||
|
- new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
|
||||||
|
- iph->daddr = new_addr;
|
||||||
|
- break;
|
||||||
|
- default:
|
||||||
|
- return -1;
|
||||||
|
- }
|
||||||
|
- csum_replace4(&iph->check, addr, new_addr);
|
||||||
|
-
|
||||||
|
- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
|
||||||
|
- struct iphdr *iph, unsigned int thoff,
|
||||||
|
- enum flow_offload_tuple_dir dir)
|
||||||
|
-{
|
||||||
|
- __be32 addr, new_addr;
|
||||||
|
-
|
||||||
|
- switch (dir) {
|
||||||
|
- case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||||
|
- addr = iph->daddr;
|
||||||
|
- new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
|
||||||
|
- iph->daddr = new_addr;
|
||||||
|
- break;
|
||||||
|
- case FLOW_OFFLOAD_DIR_REPLY:
|
||||||
|
- addr = iph->saddr;
|
||||||
|
- new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
|
||||||
|
- iph->saddr = new_addr;
|
||||||
|
- break;
|
||||||
|
- default:
|
||||||
|
- return -1;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
|
||||||
|
- enum flow_offload_tuple_dir dir)
|
||||||
|
-{
|
||||||
|
- struct iphdr *iph = ip_hdr(skb);
|
||||||
|
- unsigned int thoff = iph->ihl * 4;
|
||||||
|
-
|
||||||
|
- if (flow->flags & FLOW_OFFLOAD_SNAT &&
|
||||||
|
- (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
|
||||||
|
- nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
|
||||||
|
- return -1;
|
||||||
|
- if (flow->flags & FLOW_OFFLOAD_DNAT &&
|
||||||
|
- (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
|
||||||
|
- nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
|
||||||
|
- return -1;
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static bool ip_has_options(unsigned int thoff)
|
||||||
|
-{
|
||||||
|
- return thoff != sizeof(struct iphdr);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
|
||||||
|
- struct flow_offload_tuple *tuple)
|
||||||
|
-{
|
||||||
|
- struct flow_ports *ports;
|
||||||
|
- unsigned int thoff;
|
||||||
|
- struct iphdr *iph;
|
||||||
|
-
|
||||||
|
- if (!pskb_may_pull(skb, sizeof(*iph)))
|
||||||
|
- return -1;
|
||||||
|
-
|
||||||
|
- iph = ip_hdr(skb);
|
||||||
|
- thoff = iph->ihl * 4;
|
||||||
|
-
|
||||||
|
- if (ip_is_fragment(iph) ||
|
||||||
|
- unlikely(ip_has_options(thoff)))
|
||||||
|
- return -1;
|
||||||
|
-
|
||||||
|
- if (iph->protocol != IPPROTO_TCP &&
|
||||||
|
- iph->protocol != IPPROTO_UDP)
|
||||||
|
- return -1;
|
||||||
|
-
|
||||||
|
- thoff = iph->ihl * 4;
|
||||||
|
- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
|
||||||
|
- return -1;
|
||||||
|
-
|
||||||
|
- ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
|
||||||
|
-
|
||||||
|
- tuple->src_v4.s_addr = iph->saddr;
|
||||||
|
- tuple->dst_v4.s_addr = iph->daddr;
|
||||||
|
- tuple->src_port = ports->source;
|
||||||
|
- tuple->dst_port = ports->dest;
|
||||||
|
- tuple->l3proto = AF_INET;
|
||||||
|
- tuple->l4proto = iph->protocol;
|
||||||
|
- tuple->iifidx = dev->ifindex;
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-/* Based on ip_exceeds_mtu(). */
|
||||||
|
-static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
|
||||||
|
-{
|
||||||
|
- if (skb->len <= mtu)
|
||||||
|
- return false;
|
||||||
|
-
|
||||||
|
- if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
|
||||||
|
- return false;
|
||||||
|
-
|
||||||
|
- if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
|
||||||
|
- return false;
|
||||||
|
-
|
||||||
|
- return true;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-unsigned int
|
||||||
|
-nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
|
||||||
|
- const struct nf_hook_state *state)
|
||||||
|
-{
|
||||||
|
- struct flow_offload_tuple_rhash *tuplehash;
|
||||||
|
- struct nf_flowtable *flow_table = priv;
|
||||||
|
- struct flow_offload_tuple tuple = {};
|
||||||
|
- enum flow_offload_tuple_dir dir;
|
||||||
|
- struct flow_offload *flow;
|
||||||
|
- struct net_device *outdev;
|
||||||
|
- const struct rtable *rt;
|
||||||
|
- struct iphdr *iph;
|
||||||
|
- __be32 nexthop;
|
||||||
|
-
|
||||||
|
- if (skb->protocol != htons(ETH_P_IP))
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
-
|
||||||
|
- if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
-
|
||||||
|
- tuplehash = flow_offload_lookup(flow_table, &tuple);
|
||||||
|
- if (tuplehash == NULL)
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
-
|
||||||
|
- outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
|
||||||
|
- if (!outdev)
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
-
|
||||||
|
- dir = tuplehash->tuple.dir;
|
||||||
|
- flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
|
||||||
|
- rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
|
||||||
|
-
|
||||||
|
- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
-
|
||||||
|
- if (skb_try_make_writable(skb, sizeof(*iph)))
|
||||||
|
- return NF_DROP;
|
||||||
|
-
|
||||||
|
- if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
|
||||||
|
- nf_flow_nat_ip(flow, skb, dir) < 0)
|
||||||
|
- return NF_DROP;
|
||||||
|
-
|
||||||
|
- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
|
||||||
|
- iph = ip_hdr(skb);
|
||||||
|
- ip_decrease_ttl(iph);
|
||||||
|
-
|
||||||
|
- skb->dev = outdev;
|
||||||
|
- nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
|
||||||
|
- neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
|
||||||
|
-
|
||||||
|
- return NF_STOLEN;
|
||||||
|
-}
|
||||||
|
-EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
|
||||||
|
|
||||||
|
static struct nf_flowtable_type flowtable_ipv4 = {
|
||||||
|
.family = NFPROTO_IPV4,
|
||||||
|
--- a/net/netfilter/Makefile
|
||||||
|
+++ b/net/netfilter/Makefile
|
||||||
|
@@ -113,7 +113,7 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_
|
||||||
|
|
||||||
|
# flow table infrastructure
|
||||||
|
obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
|
||||||
|
-nf_flow_table-objs := nf_flow_table_core.o
|
||||||
|
+nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
|
||||||
|
|
||||||
|
obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
|
||||||
|
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||||
|
@@ -0,0 +1,245 @@
|
||||||
|
+#include <linux/kernel.h>
|
||||||
|
+#include <linux/init.h>
|
||||||
|
+#include <linux/module.h>
|
||||||
|
+#include <linux/netfilter.h>
|
||||||
|
+#include <linux/rhashtable.h>
|
||||||
|
+#include <linux/ip.h>
|
||||||
|
+#include <linux/netdevice.h>
|
||||||
|
+#include <net/ip.h>
|
||||||
|
+#include <net/neighbour.h>
|
||||||
|
+#include <net/netfilter/nf_flow_table.h>
|
||||||
|
+/* For layer 4 checksum field offset. */
|
||||||
|
+#include <linux/tcp.h>
|
||||||
|
+#include <linux/udp.h>
|
||||||
|
+
|
||||||
|
+static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
|
||||||
|
+ __be32 addr, __be32 new_addr)
|
||||||
|
+{
|
||||||
|
+ struct tcphdr *tcph;
|
||||||
|
+
|
||||||
|
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
|
||||||
|
+ skb_try_make_writable(skb, thoff + sizeof(*tcph)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ tcph = (void *)(skb_network_header(skb) + thoff);
|
||||||
|
+ inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
|
||||||
|
+ __be32 addr, __be32 new_addr)
|
||||||
|
+{
|
||||||
|
+ struct udphdr *udph;
|
||||||
|
+
|
||||||
|
+ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
|
||||||
|
+ skb_try_make_writable(skb, thoff + sizeof(*udph)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ udph = (void *)(skb_network_header(skb) + thoff);
|
||||||
|
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
|
||||||
|
+ inet_proto_csum_replace4(&udph->check, skb, addr,
|
||||||
|
+ new_addr, true);
|
||||||
|
+ if (!udph->check)
|
||||||
|
+ udph->check = CSUM_MANGLED_0;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
|
||||||
|
+ unsigned int thoff, __be32 addr,
|
||||||
|
+ __be32 new_addr)
|
||||||
|
+{
|
||||||
|
+ switch (iph->protocol) {
|
||||||
|
+ case IPPROTO_TCP:
|
||||||
|
+ if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
|
||||||
|
+ return NF_DROP;
|
||||||
|
+ break;
|
||||||
|
+ case IPPROTO_UDP:
|
||||||
|
+ if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
|
||||||
|
+ return NF_DROP;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
|
||||||
|
+ struct iphdr *iph, unsigned int thoff,
|
||||||
|
+ enum flow_offload_tuple_dir dir)
|
||||||
|
+{
|
||||||
|
+ __be32 addr, new_addr;
|
||||||
|
+
|
||||||
|
+ switch (dir) {
|
||||||
|
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||||
|
+ addr = iph->saddr;
|
||||||
|
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
|
||||||
|
+ iph->saddr = new_addr;
|
||||||
|
+ break;
|
||||||
|
+ case FLOW_OFFLOAD_DIR_REPLY:
|
||||||
|
+ addr = iph->daddr;
|
||||||
|
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
|
||||||
|
+ iph->daddr = new_addr;
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+ csum_replace4(&iph->check, addr, new_addr);
|
||||||
|
+
|
||||||
|
+ return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
|
||||||
|
+ struct iphdr *iph, unsigned int thoff,
|
||||||
|
+ enum flow_offload_tuple_dir dir)
|
||||||
|
+{
|
||||||
|
+ __be32 addr, new_addr;
|
||||||
|
+
|
||||||
|
+ switch (dir) {
|
||||||
|
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||||
|
+ addr = iph->daddr;
|
||||||
|
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
|
||||||
|
+ iph->daddr = new_addr;
|
||||||
|
+ break;
|
||||||
|
+ case FLOW_OFFLOAD_DIR_REPLY:
|
||||||
|
+ addr = iph->saddr;
|
||||||
|
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
|
||||||
|
+ iph->saddr = new_addr;
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
|
||||||
|
+ enum flow_offload_tuple_dir dir)
|
||||||
|
+{
|
||||||
|
+ struct iphdr *iph = ip_hdr(skb);
|
||||||
|
+ unsigned int thoff = iph->ihl * 4;
|
||||||
|
+
|
||||||
|
+ if (flow->flags & FLOW_OFFLOAD_SNAT &&
|
||||||
|
+ (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
|
||||||
|
+ nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
|
||||||
|
+ return -1;
|
||||||
|
+ if (flow->flags & FLOW_OFFLOAD_DNAT &&
|
||||||
|
+ (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
|
||||||
|
+ nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static bool ip_has_options(unsigned int thoff)
|
||||||
|
+{
|
||||||
|
+ return thoff != sizeof(struct iphdr);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
|
||||||
|
+ struct flow_offload_tuple *tuple)
|
||||||
|
+{
|
||||||
|
+ struct flow_ports *ports;
|
||||||
|
+ unsigned int thoff;
|
||||||
|
+ struct iphdr *iph;
|
||||||
|
+
|
||||||
|
+ if (!pskb_may_pull(skb, sizeof(*iph)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ iph = ip_hdr(skb);
|
||||||
|
+ thoff = iph->ihl * 4;
|
||||||
|
+
|
||||||
|
+ if (ip_is_fragment(iph) ||
|
||||||
|
+ unlikely(ip_has_options(thoff)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ if (iph->protocol != IPPROTO_TCP &&
|
||||||
|
+ iph->protocol != IPPROTO_UDP)
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ thoff = iph->ihl * 4;
|
||||||
|
+ if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
|
||||||
|
+
|
||||||
|
+ tuple->src_v4.s_addr = iph->saddr;
|
||||||
|
+ tuple->dst_v4.s_addr = iph->daddr;
|
||||||
|
+ tuple->src_port = ports->source;
|
||||||
|
+ tuple->dst_port = ports->dest;
|
||||||
|
+ tuple->l3proto = AF_INET;
|
||||||
|
+ tuple->l4proto = iph->protocol;
|
||||||
|
+ tuple->iifidx = dev->ifindex;
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Based on ip_exceeds_mtu(). */
|
||||||
|
+static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
|
||||||
|
+{
|
||||||
|
+ if (skb->len <= mtu)
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ return true;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+unsigned int
|
||||||
|
+nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
|
||||||
|
+ const struct nf_hook_state *state)
|
||||||
|
+{
|
||||||
|
+ struct flow_offload_tuple_rhash *tuplehash;
|
||||||
|
+ struct nf_flowtable *flow_table = priv;
|
||||||
|
+ struct flow_offload_tuple tuple = {};
|
||||||
|
+ enum flow_offload_tuple_dir dir;
|
||||||
|
+ struct flow_offload *flow;
|
||||||
|
+ struct net_device *outdev;
|
||||||
|
+ const struct rtable *rt;
|
||||||
|
+ struct iphdr *iph;
|
||||||
|
+ __be32 nexthop;
|
||||||
|
+
|
||||||
|
+ if (skb->protocol != htons(ETH_P_IP))
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ tuplehash = flow_offload_lookup(flow_table, &tuple);
|
||||||
|
+ if (tuplehash == NULL)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
|
||||||
|
+ if (!outdev)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ dir = tuplehash->tuple.dir;
|
||||||
|
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
|
||||||
|
+ rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
|
||||||
|
+
|
||||||
|
+ if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ if (skb_try_make_writable(skb, sizeof(*iph)))
|
||||||
|
+ return NF_DROP;
|
||||||
|
+
|
||||||
|
+ if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
|
||||||
|
+ nf_flow_nat_ip(flow, skb, dir) < 0)
|
||||||
|
+ return NF_DROP;
|
||||||
|
+
|
||||||
|
+ flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
|
||||||
|
+ iph = ip_hdr(skb);
|
||||||
|
+ ip_decrease_ttl(iph);
|
||||||
|
+
|
||||||
|
+ skb->dev = outdev;
|
||||||
|
+ nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
|
||||||
|
+ neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
|
||||||
|
+
|
||||||
|
+ return NF_STOLEN;
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
|
|
@ -0,0 +1,32 @@
|
||||||
|
From: Felix Fietkau <nbd@nbd.name>
|
||||||
|
Date: Sat, 17 Feb 2018 11:51:20 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_flow_table: move ip header check out of
|
||||||
|
nf_flow_exceeds_mtu
|
||||||
|
|
||||||
|
Allows the function to be shared with the IPv6 hook code
|
||||||
|
|
||||||
|
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/netfilter/nf_flow_table_ip.c
|
||||||
|
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||||
|
@@ -181,9 +181,6 @@ static bool nf_flow_exceeds_mtu(const st
|
||||||
|
if (skb->len <= mtu)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
- if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
|
||||||
|
- return false;
|
||||||
|
-
|
||||||
|
if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
@@ -222,7 +219,8 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||||
|
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
|
||||||
|
rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
|
||||||
|
|
||||||
|
- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
|
||||||
|
+ if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) &&
|
||||||
|
+ (ip_hdr(skb)->frag_off & htons(IP_DF)) != 0)
|
||||||
|
return NF_ACCEPT;
|
||||||
|
|
||||||
|
if (skb_try_make_writable(skb, sizeof(*iph)))
|
|
@ -0,0 +1,483 @@
|
||||||
|
From: Felix Fietkau <nbd@nbd.name>
|
||||||
|
Date: Sat, 17 Feb 2018 11:55:51 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_flow_table: move ipv6 offload hook code to
|
||||||
|
nf_flow_table
|
||||||
|
|
||||||
|
Useful as preparation for adding iptables support for offload
|
||||||
|
|
||||||
|
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
|
||||||
|
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
|
||||||
|
@@ -3,240 +3,8 @@
|
||||||
|
#include <linux/module.h>
|
||||||
|
#include <linux/netfilter.h>
|
||||||
|
#include <linux/rhashtable.h>
|
||||||
|
-#include <linux/ipv6.h>
|
||||||
|
-#include <linux/netdevice.h>
|
||||||
|
-#include <net/ipv6.h>
|
||||||
|
-#include <net/ip6_route.h>
|
||||||
|
-#include <net/neighbour.h>
|
||||||
|
#include <net/netfilter/nf_flow_table.h>
|
||||||
|
#include <net/netfilter/nf_tables.h>
|
||||||
|
-/* For layer 4 checksum field offset. */
|
||||||
|
-#include <linux/tcp.h>
|
||||||
|
-#include <linux/udp.h>
|
||||||
|
-
|
||||||
|
-static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
|
||||||
|
- struct in6_addr *addr,
|
||||||
|
- struct in6_addr *new_addr)
|
||||||
|
-{
|
||||||
|
- struct tcphdr *tcph;
|
||||||
|
-
|
||||||
|
- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
|
||||||
|
- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
|
||||||
|
- return -1;
|
||||||
|
-
|
||||||
|
- tcph = (void *)(skb_network_header(skb) + thoff);
|
||||||
|
- inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
|
||||||
|
- new_addr->s6_addr32, true);
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
|
||||||
|
- struct in6_addr *addr,
|
||||||
|
- struct in6_addr *new_addr)
|
||||||
|
-{
|
||||||
|
- struct udphdr *udph;
|
||||||
|
-
|
||||||
|
- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
|
||||||
|
- skb_try_make_writable(skb, thoff + sizeof(*udph)))
|
||||||
|
- return -1;
|
||||||
|
-
|
||||||
|
- udph = (void *)(skb_network_header(skb) + thoff);
|
||||||
|
- if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
|
||||||
|
- inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
|
||||||
|
- new_addr->s6_addr32, true);
|
||||||
|
- if (!udph->check)
|
||||||
|
- udph->check = CSUM_MANGLED_0;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
|
||||||
|
- unsigned int thoff, struct in6_addr *addr,
|
||||||
|
- struct in6_addr *new_addr)
|
||||||
|
-{
|
||||||
|
- switch (ip6h->nexthdr) {
|
||||||
|
- case IPPROTO_TCP:
|
||||||
|
- if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
|
||||||
|
- return NF_DROP;
|
||||||
|
- break;
|
||||||
|
- case IPPROTO_UDP:
|
||||||
|
- if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
|
||||||
|
- return NF_DROP;
|
||||||
|
- break;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static int nf_flow_snat_ipv6(const struct flow_offload *flow,
|
||||||
|
- struct sk_buff *skb, struct ipv6hdr *ip6h,
|
||||||
|
- unsigned int thoff,
|
||||||
|
- enum flow_offload_tuple_dir dir)
|
||||||
|
-{
|
||||||
|
- struct in6_addr addr, new_addr;
|
||||||
|
-
|
||||||
|
- switch (dir) {
|
||||||
|
- case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||||
|
- addr = ip6h->saddr;
|
||||||
|
- new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
|
||||||
|
- ip6h->saddr = new_addr;
|
||||||
|
- break;
|
||||||
|
- case FLOW_OFFLOAD_DIR_REPLY:
|
||||||
|
- addr = ip6h->daddr;
|
||||||
|
- new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
|
||||||
|
- ip6h->daddr = new_addr;
|
||||||
|
- break;
|
||||||
|
- default:
|
||||||
|
- return -1;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
|
||||||
|
- struct sk_buff *skb, struct ipv6hdr *ip6h,
|
||||||
|
- unsigned int thoff,
|
||||||
|
- enum flow_offload_tuple_dir dir)
|
||||||
|
-{
|
||||||
|
- struct in6_addr addr, new_addr;
|
||||||
|
-
|
||||||
|
- switch (dir) {
|
||||||
|
- case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||||
|
- addr = ip6h->daddr;
|
||||||
|
- new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
|
||||||
|
- ip6h->daddr = new_addr;
|
||||||
|
- break;
|
||||||
|
- case FLOW_OFFLOAD_DIR_REPLY:
|
||||||
|
- addr = ip6h->saddr;
|
||||||
|
- new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
|
||||||
|
- ip6h->saddr = new_addr;
|
||||||
|
- break;
|
||||||
|
- default:
|
||||||
|
- return -1;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static int nf_flow_nat_ipv6(const struct flow_offload *flow,
|
||||||
|
- struct sk_buff *skb,
|
||||||
|
- enum flow_offload_tuple_dir dir)
|
||||||
|
-{
|
||||||
|
- struct ipv6hdr *ip6h = ipv6_hdr(skb);
|
||||||
|
- unsigned int thoff = sizeof(*ip6h);
|
||||||
|
-
|
||||||
|
- if (flow->flags & FLOW_OFFLOAD_SNAT &&
|
||||||
|
- (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
|
||||||
|
- nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
|
||||||
|
- return -1;
|
||||||
|
- if (flow->flags & FLOW_OFFLOAD_DNAT &&
|
||||||
|
- (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
|
||||||
|
- nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
|
||||||
|
- return -1;
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
|
||||||
|
- struct flow_offload_tuple *tuple)
|
||||||
|
-{
|
||||||
|
- struct flow_ports *ports;
|
||||||
|
- struct ipv6hdr *ip6h;
|
||||||
|
- unsigned int thoff;
|
||||||
|
-
|
||||||
|
- if (!pskb_may_pull(skb, sizeof(*ip6h)))
|
||||||
|
- return -1;
|
||||||
|
-
|
||||||
|
- ip6h = ipv6_hdr(skb);
|
||||||
|
-
|
||||||
|
- if (ip6h->nexthdr != IPPROTO_TCP &&
|
||||||
|
- ip6h->nexthdr != IPPROTO_UDP)
|
||||||
|
- return -1;
|
||||||
|
-
|
||||||
|
- thoff = sizeof(*ip6h);
|
||||||
|
- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
|
||||||
|
- return -1;
|
||||||
|
-
|
||||||
|
- ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
|
||||||
|
-
|
||||||
|
- tuple->src_v6 = ip6h->saddr;
|
||||||
|
- tuple->dst_v6 = ip6h->daddr;
|
||||||
|
- tuple->src_port = ports->source;
|
||||||
|
- tuple->dst_port = ports->dest;
|
||||||
|
- tuple->l3proto = AF_INET6;
|
||||||
|
- tuple->l4proto = ip6h->nexthdr;
|
||||||
|
- tuple->iifidx = dev->ifindex;
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-/* Based on ip_exceeds_mtu(). */
|
||||||
|
-static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
|
||||||
|
-{
|
||||||
|
- if (skb->len <= mtu)
|
||||||
|
- return false;
|
||||||
|
-
|
||||||
|
- if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
|
||||||
|
- return false;
|
||||||
|
-
|
||||||
|
- return true;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-unsigned int
|
||||||
|
-nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
|
||||||
|
- const struct nf_hook_state *state)
|
||||||
|
-{
|
||||||
|
- struct flow_offload_tuple_rhash *tuplehash;
|
||||||
|
- struct nf_flowtable *flow_table = priv;
|
||||||
|
- struct flow_offload_tuple tuple = {};
|
||||||
|
- enum flow_offload_tuple_dir dir;
|
||||||
|
- struct flow_offload *flow;
|
||||||
|
- struct net_device *outdev;
|
||||||
|
- struct in6_addr *nexthop;
|
||||||
|
- struct ipv6hdr *ip6h;
|
||||||
|
- struct rt6_info *rt;
|
||||||
|
-
|
||||||
|
- if (skb->protocol != htons(ETH_P_IPV6))
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
-
|
||||||
|
- if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
-
|
||||||
|
- tuplehash = flow_offload_lookup(flow_table, &tuple);
|
||||||
|
- if (tuplehash == NULL)
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
-
|
||||||
|
- outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
|
||||||
|
- if (!outdev)
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
-
|
||||||
|
- dir = tuplehash->tuple.dir;
|
||||||
|
- flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
|
||||||
|
- rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
|
||||||
|
-
|
||||||
|
- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
|
||||||
|
- return NF_ACCEPT;
|
||||||
|
-
|
||||||
|
- if (skb_try_make_writable(skb, sizeof(*ip6h)))
|
||||||
|
- return NF_DROP;
|
||||||
|
-
|
||||||
|
- if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
|
||||||
|
- nf_flow_nat_ipv6(flow, skb, dir) < 0)
|
||||||
|
- return NF_DROP;
|
||||||
|
-
|
||||||
|
- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
|
||||||
|
- ip6h = ipv6_hdr(skb);
|
||||||
|
- ip6h->hop_limit--;
|
||||||
|
-
|
||||||
|
- skb->dev = outdev;
|
||||||
|
- nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
|
||||||
|
- neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
|
||||||
|
-
|
||||||
|
- return NF_STOLEN;
|
||||||
|
-}
|
||||||
|
-EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
|
||||||
|
|
||||||
|
static struct nf_flowtable_type flowtable_ipv6 = {
|
||||||
|
.family = NFPROTO_IPV6,
|
||||||
|
--- a/net/netfilter/nf_flow_table_ip.c
|
||||||
|
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||||
|
@@ -4,8 +4,11 @@
|
||||||
|
#include <linux/netfilter.h>
|
||||||
|
#include <linux/rhashtable.h>
|
||||||
|
#include <linux/ip.h>
|
||||||
|
+#include <linux/ipv6.h>
|
||||||
|
#include <linux/netdevice.h>
|
||||||
|
#include <net/ip.h>
|
||||||
|
+#include <net/ipv6.h>
|
||||||
|
+#include <net/ip6_route.h>
|
||||||
|
#include <net/neighbour.h>
|
||||||
|
#include <net/netfilter/nf_flow_table.h>
|
||||||
|
/* For layer 4 checksum field offset. */
|
||||||
|
@@ -241,3 +244,215 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||||
|
return NF_STOLEN;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
|
||||||
|
+
|
||||||
|
+static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
|
||||||
|
+ struct in6_addr *addr,
|
||||||
|
+ struct in6_addr *new_addr)
|
||||||
|
+{
|
||||||
|
+ struct tcphdr *tcph;
|
||||||
|
+
|
||||||
|
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
|
||||||
|
+ skb_try_make_writable(skb, thoff + sizeof(*tcph)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ tcph = (void *)(skb_network_header(skb) + thoff);
|
||||||
|
+ inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
|
||||||
|
+ new_addr->s6_addr32, true);
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
|
||||||
|
+ struct in6_addr *addr,
|
||||||
|
+ struct in6_addr *new_addr)
|
||||||
|
+{
|
||||||
|
+ struct udphdr *udph;
|
||||||
|
+
|
||||||
|
+ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
|
||||||
|
+ skb_try_make_writable(skb, thoff + sizeof(*udph)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ udph = (void *)(skb_network_header(skb) + thoff);
|
||||||
|
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
|
||||||
|
+ inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
|
||||||
|
+ new_addr->s6_addr32, true);
|
||||||
|
+ if (!udph->check)
|
||||||
|
+ udph->check = CSUM_MANGLED_0;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
|
||||||
|
+ unsigned int thoff, struct in6_addr *addr,
|
||||||
|
+ struct in6_addr *new_addr)
|
||||||
|
+{
|
||||||
|
+ switch (ip6h->nexthdr) {
|
||||||
|
+ case IPPROTO_TCP:
|
||||||
|
+ if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
|
||||||
|
+ return NF_DROP;
|
||||||
|
+ break;
|
||||||
|
+ case IPPROTO_UDP:
|
||||||
|
+ if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
|
||||||
|
+ return NF_DROP;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_snat_ipv6(const struct flow_offload *flow,
|
||||||
|
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
|
||||||
|
+ unsigned int thoff,
|
||||||
|
+ enum flow_offload_tuple_dir dir)
|
||||||
|
+{
|
||||||
|
+ struct in6_addr addr, new_addr;
|
||||||
|
+
|
||||||
|
+ switch (dir) {
|
||||||
|
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||||
|
+ addr = ip6h->saddr;
|
||||||
|
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
|
||||||
|
+ ip6h->saddr = new_addr;
|
||||||
|
+ break;
|
||||||
|
+ case FLOW_OFFLOAD_DIR_REPLY:
|
||||||
|
+ addr = ip6h->daddr;
|
||||||
|
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
|
||||||
|
+ ip6h->daddr = new_addr;
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
|
||||||
|
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
|
||||||
|
+ unsigned int thoff,
|
||||||
|
+ enum flow_offload_tuple_dir dir)
|
||||||
|
+{
|
||||||
|
+ struct in6_addr addr, new_addr;
|
||||||
|
+
|
||||||
|
+ switch (dir) {
|
||||||
|
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||||
|
+ addr = ip6h->daddr;
|
||||||
|
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
|
||||||
|
+ ip6h->daddr = new_addr;
|
||||||
|
+ break;
|
||||||
|
+ case FLOW_OFFLOAD_DIR_REPLY:
|
||||||
|
+ addr = ip6h->saddr;
|
||||||
|
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
|
||||||
|
+ ip6h->saddr = new_addr;
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_nat_ipv6(const struct flow_offload *flow,
|
||||||
|
+ struct sk_buff *skb,
|
||||||
|
+ enum flow_offload_tuple_dir dir)
|
||||||
|
+{
|
||||||
|
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
|
||||||
|
+ unsigned int thoff = sizeof(*ip6h);
|
||||||
|
+
|
||||||
|
+ if (flow->flags & FLOW_OFFLOAD_SNAT &&
|
||||||
|
+ (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
|
||||||
|
+ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
|
||||||
|
+ return -1;
|
||||||
|
+ if (flow->flags & FLOW_OFFLOAD_DNAT &&
|
||||||
|
+ (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
|
||||||
|
+ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
|
||||||
|
+ struct flow_offload_tuple *tuple)
|
||||||
|
+{
|
||||||
|
+ struct flow_ports *ports;
|
||||||
|
+ struct ipv6hdr *ip6h;
|
||||||
|
+ unsigned int thoff;
|
||||||
|
+
|
||||||
|
+ if (!pskb_may_pull(skb, sizeof(*ip6h)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ ip6h = ipv6_hdr(skb);
|
||||||
|
+
|
||||||
|
+ if (ip6h->nexthdr != IPPROTO_TCP &&
|
||||||
|
+ ip6h->nexthdr != IPPROTO_UDP)
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ thoff = sizeof(*ip6h);
|
||||||
|
+ if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
|
||||||
|
+
|
||||||
|
+ tuple->src_v6 = ip6h->saddr;
|
||||||
|
+ tuple->dst_v6 = ip6h->daddr;
|
||||||
|
+ tuple->src_port = ports->source;
|
||||||
|
+ tuple->dst_port = ports->dest;
|
||||||
|
+ tuple->l3proto = AF_INET6;
|
||||||
|
+ tuple->l4proto = ip6h->nexthdr;
|
||||||
|
+ tuple->iifidx = dev->ifindex;
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+unsigned int
|
||||||
|
+nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
|
||||||
|
+ const struct nf_hook_state *state)
|
||||||
|
+{
|
||||||
|
+ struct flow_offload_tuple_rhash *tuplehash;
|
||||||
|
+ struct nf_flowtable *flow_table = priv;
|
||||||
|
+ struct flow_offload_tuple tuple = {};
|
||||||
|
+ enum flow_offload_tuple_dir dir;
|
||||||
|
+ struct flow_offload *flow;
|
||||||
|
+ struct net_device *outdev;
|
||||||
|
+ struct in6_addr *nexthop;
|
||||||
|
+ struct ipv6hdr *ip6h;
|
||||||
|
+ struct rt6_info *rt;
|
||||||
|
+
|
||||||
|
+ if (skb->protocol != htons(ETH_P_IPV6))
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ tuplehash = flow_offload_lookup(flow_table, &tuple);
|
||||||
|
+ if (tuplehash == NULL)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
|
||||||
|
+ if (!outdev)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ dir = tuplehash->tuple.dir;
|
||||||
|
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
|
||||||
|
+ rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
|
||||||
|
+
|
||||||
|
+ if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ if (skb_try_make_writable(skb, sizeof(*ip6h)))
|
||||||
|
+ return NF_DROP;
|
||||||
|
+
|
||||||
|
+ if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
|
||||||
|
+ nf_flow_nat_ipv6(flow, skb, dir) < 0)
|
||||||
|
+ return NF_DROP;
|
||||||
|
+
|
||||||
|
+ flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
|
||||||
|
+ ip6h = ipv6_hdr(skb);
|
||||||
|
+ ip6h->hop_limit--;
|
||||||
|
+
|
||||||
|
+ skb->dev = outdev;
|
||||||
|
+ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
|
||||||
|
+ neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
|
||||||
|
+
|
||||||
|
+ return NF_STOLEN;
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
|
|
@ -0,0 +1,23 @@
|
||||||
|
From: Felix Fietkau <nbd@nbd.name>
|
||||||
|
Date: Sat, 17 Feb 2018 12:02:28 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_flow_table: relax mixed ipv4/ipv6 flowtable
|
||||||
|
dependencies
|
||||||
|
|
||||||
|
Since the offload hook code was moved, this table no longer depends on
|
||||||
|
the IPv4 and IPv6 flowtable modules
|
||||||
|
|
||||||
|
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/netfilter/Kconfig
|
||||||
|
+++ b/net/netfilter/Kconfig
|
||||||
|
@@ -676,8 +676,7 @@ endif # NF_TABLES
|
||||||
|
|
||||||
|
config NF_FLOW_TABLE_INET
|
||||||
|
tristate "Netfilter flow table mixed IPv4/IPv6 module"
|
||||||
|
- depends on NF_FLOW_TABLE_IPV4
|
||||||
|
- depends on NF_FLOW_TABLE_IPV6
|
||||||
|
+ depends on NF_FLOW_TABLE
|
||||||
|
help
|
||||||
|
This option adds the flow table mixed IPv4/IPv6 support.
|
||||||
|
|
|
@ -0,0 +1,298 @@
|
||||||
|
From: Felix Fietkau <nbd@nbd.name>
|
||||||
|
Date: Sun, 18 Feb 2018 18:16:31 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_flow_table: move init code to
|
||||||
|
nf_flow_table_core.c
|
||||||
|
|
||||||
|
Reduces duplication of .gc and .params in flowtable type definitions and
|
||||||
|
makes the API clearer
|
||||||
|
|
||||||
|
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/net/netfilter/nf_flow_table.h
|
||||||
|
+++ b/include/net/netfilter/nf_flow_table.h
|
||||||
|
@@ -14,9 +14,8 @@ struct nf_flowtable;
|
||||||
|
struct nf_flowtable_type {
|
||||||
|
struct list_head list;
|
||||||
|
int family;
|
||||||
|
- void (*gc)(struct work_struct *work);
|
||||||
|
+ int (*init)(struct nf_flowtable *ft);
|
||||||
|
void (*free)(struct nf_flowtable *ft);
|
||||||
|
- const struct rhashtable_params *params;
|
||||||
|
nf_hookfn *hook;
|
||||||
|
struct module *owner;
|
||||||
|
};
|
||||||
|
@@ -100,9 +99,8 @@ int nf_flow_table_iterate(struct nf_flow
|
||||||
|
|
||||||
|
void nf_flow_table_cleanup(struct net *net, struct net_device *dev);
|
||||||
|
|
||||||
|
+int nf_flow_table_init(struct nf_flowtable *flow_table);
|
||||||
|
void nf_flow_table_free(struct nf_flowtable *flow_table);
|
||||||
|
-void nf_flow_offload_work_gc(struct work_struct *work);
|
||||||
|
-extern const struct rhashtable_params nf_flow_offload_rhash_params;
|
||||||
|
|
||||||
|
void flow_offload_dead(struct flow_offload *flow);
|
||||||
|
|
||||||
|
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
|
||||||
|
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
|
||||||
|
@@ -7,8 +7,7 @@
|
||||||
|
|
||||||
|
static struct nf_flowtable_type flowtable_ipv4 = {
|
||||||
|
.family = NFPROTO_IPV4,
|
||||||
|
- .params = &nf_flow_offload_rhash_params,
|
||||||
|
- .gc = nf_flow_offload_work_gc,
|
||||||
|
+ .init = nf_flow_table_init,
|
||||||
|
.free = nf_flow_table_free,
|
||||||
|
.hook = nf_flow_offload_ip_hook,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
|
||||||
|
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
|
||||||
|
@@ -8,8 +8,7 @@
|
||||||
|
|
||||||
|
static struct nf_flowtable_type flowtable_ipv6 = {
|
||||||
|
.family = NFPROTO_IPV6,
|
||||||
|
- .params = &nf_flow_offload_rhash_params,
|
||||||
|
- .gc = nf_flow_offload_work_gc,
|
||||||
|
+ .init = nf_flow_table_init,
|
||||||
|
.free = nf_flow_table_free,
|
||||||
|
.hook = nf_flow_offload_ipv6_hook,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
--- a/net/netfilter/nf_flow_table_core.c
|
||||||
|
+++ b/net/netfilter/nf_flow_table_core.c
|
||||||
|
@@ -116,16 +116,50 @@ void flow_offload_dead(struct flow_offlo
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(flow_offload_dead);
|
||||||
|
|
||||||
|
+static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
|
||||||
|
+{
|
||||||
|
+ const struct flow_offload_tuple *tuple = data;
|
||||||
|
+
|
||||||
|
+ return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
|
||||||
|
+{
|
||||||
|
+ const struct flow_offload_tuple_rhash *tuplehash = data;
|
||||||
|
+
|
||||||
|
+ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
|
||||||
|
+ const void *ptr)
|
||||||
|
+{
|
||||||
|
+ const struct flow_offload_tuple *tuple = arg->key;
|
||||||
|
+ const struct flow_offload_tuple_rhash *x = ptr;
|
||||||
|
+
|
||||||
|
+ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
|
||||||
|
+ return 1;
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static const struct rhashtable_params nf_flow_offload_rhash_params = {
|
||||||
|
+ .head_offset = offsetof(struct flow_offload_tuple_rhash, node),
|
||||||
|
+ .hashfn = flow_offload_hash,
|
||||||
|
+ .obj_hashfn = flow_offload_hash_obj,
|
||||||
|
+ .obj_cmpfn = flow_offload_hash_cmp,
|
||||||
|
+ .automatic_shrinking = true,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
|
||||||
|
{
|
||||||
|
flow->timeout = (u32)jiffies;
|
||||||
|
|
||||||
|
rhashtable_insert_fast(&flow_table->rhashtable,
|
||||||
|
&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
|
||||||
|
- *flow_table->type->params);
|
||||||
|
+ nf_flow_offload_rhash_params);
|
||||||
|
rhashtable_insert_fast(&flow_table->rhashtable,
|
||||||
|
&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
|
||||||
|
- *flow_table->type->params);
|
||||||
|
+ nf_flow_offload_rhash_params);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(flow_offload_add);
|
||||||
|
@@ -135,10 +169,10 @@ static void flow_offload_del(struct nf_f
|
||||||
|
{
|
||||||
|
rhashtable_remove_fast(&flow_table->rhashtable,
|
||||||
|
&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
|
||||||
|
- *flow_table->type->params);
|
||||||
|
+ nf_flow_offload_rhash_params);
|
||||||
|
rhashtable_remove_fast(&flow_table->rhashtable,
|
||||||
|
&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
|
||||||
|
- *flow_table->type->params);
|
||||||
|
+ nf_flow_offload_rhash_params);
|
||||||
|
|
||||||
|
flow_offload_free(flow);
|
||||||
|
}
|
||||||
|
@@ -148,7 +182,7 @@ flow_offload_lookup(struct nf_flowtable
|
||||||
|
struct flow_offload_tuple *tuple)
|
||||||
|
{
|
||||||
|
return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
|
||||||
|
- *flow_table->type->params);
|
||||||
|
+ nf_flow_offload_rhash_params);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(flow_offload_lookup);
|
||||||
|
|
||||||
|
@@ -237,7 +271,7 @@ out:
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
-void nf_flow_offload_work_gc(struct work_struct *work)
|
||||||
|
+static void nf_flow_offload_work_gc(struct work_struct *work)
|
||||||
|
{
|
||||||
|
struct nf_flowtable *flow_table;
|
||||||
|
|
||||||
|
@@ -245,42 +279,6 @@ void nf_flow_offload_work_gc(struct work
|
||||||
|
nf_flow_offload_gc_step(flow_table);
|
||||||
|
queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
|
||||||
|
}
|
||||||
|
-EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
|
||||||
|
-
|
||||||
|
-static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
|
||||||
|
-{
|
||||||
|
- const struct flow_offload_tuple *tuple = data;
|
||||||
|
-
|
||||||
|
- return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
|
||||||
|
-{
|
||||||
|
- const struct flow_offload_tuple_rhash *tuplehash = data;
|
||||||
|
-
|
||||||
|
- return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
|
||||||
|
- const void *ptr)
|
||||||
|
-{
|
||||||
|
- const struct flow_offload_tuple *tuple = arg->key;
|
||||||
|
- const struct flow_offload_tuple_rhash *x = ptr;
|
||||||
|
-
|
||||||
|
- if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
|
||||||
|
- return 1;
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-const struct rhashtable_params nf_flow_offload_rhash_params = {
|
||||||
|
- .head_offset = offsetof(struct flow_offload_tuple_rhash, node),
|
||||||
|
- .hashfn = flow_offload_hash,
|
||||||
|
- .obj_hashfn = flow_offload_hash_obj,
|
||||||
|
- .obj_cmpfn = flow_offload_hash_cmp,
|
||||||
|
- .automatic_shrinking = true,
|
||||||
|
-};
|
||||||
|
-EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params);
|
||||||
|
|
||||||
|
static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
|
||||||
|
__be16 port, __be16 new_port)
|
||||||
|
@@ -398,6 +396,24 @@ int nf_flow_dnat_port(const struct flow_
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
|
||||||
|
|
||||||
|
+int nf_flow_table_init(struct nf_flowtable *flowtable)
|
||||||
|
+{
|
||||||
|
+ int err;
|
||||||
|
+
|
||||||
|
+ INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
|
||||||
|
+
|
||||||
|
+ err = rhashtable_init(&flowtable->rhashtable,
|
||||||
|
+ &nf_flow_offload_rhash_params);
|
||||||
|
+ if (err < 0)
|
||||||
|
+ return err;
|
||||||
|
+
|
||||||
|
+ queue_delayed_work(system_power_efficient_wq,
|
||||||
|
+ &flowtable->gc_work, HZ);
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(nf_flow_table_init);
|
||||||
|
+
|
||||||
|
static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
|
||||||
|
{
|
||||||
|
struct net_device *dev = data;
|
||||||
|
@@ -423,8 +439,10 @@ EXPORT_SYMBOL_GPL(nf_flow_table_cleanup)
|
||||||
|
|
||||||
|
void nf_flow_table_free(struct nf_flowtable *flow_table)
|
||||||
|
{
|
||||||
|
+ cancel_delayed_work_sync(&flow_table->gc_work);
|
||||||
|
nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
|
||||||
|
WARN_ON(!nf_flow_offload_gc_step(flow_table));
|
||||||
|
+ rhashtable_destroy(&flow_table->rhashtable);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nf_flow_table_free);
|
||||||
|
|
||||||
|
--- a/net/netfilter/nf_flow_table_inet.c
|
||||||
|
+++ b/net/netfilter/nf_flow_table_inet.c
|
||||||
|
@@ -22,8 +22,7 @@ nf_flow_offload_inet_hook(void *priv, st
|
||||||
|
|
||||||
|
static struct nf_flowtable_type flowtable_inet = {
|
||||||
|
.family = NFPROTO_INET,
|
||||||
|
- .params = &nf_flow_offload_rhash_params,
|
||||||
|
- .gc = nf_flow_offload_work_gc,
|
||||||
|
+ .init = nf_flow_table_init,
|
||||||
|
.free = nf_flow_table_free,
|
||||||
|
.hook = nf_flow_offload_inet_hook,
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
--- a/net/netfilter/nf_tables_api.c
|
||||||
|
+++ b/net/netfilter/nf_tables_api.c
|
||||||
|
@@ -5081,40 +5081,38 @@ static int nf_tables_newflowtable(struct
|
||||||
|
}
|
||||||
|
|
||||||
|
flowtable->data.type = type;
|
||||||
|
- err = rhashtable_init(&flowtable->data.rhashtable, type->params);
|
||||||
|
+ err = type->init(&flowtable->data);
|
||||||
|
if (err < 0)
|
||||||
|
goto err3;
|
||||||
|
|
||||||
|
err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK],
|
||||||
|
flowtable);
|
||||||
|
if (err < 0)
|
||||||
|
- goto err3;
|
||||||
|
+ goto err4;
|
||||||
|
|
||||||
|
for (i = 0; i < flowtable->ops_len; i++) {
|
||||||
|
err = nf_register_net_hook(net, &flowtable->ops[i]);
|
||||||
|
if (err < 0)
|
||||||
|
- goto err4;
|
||||||
|
+ goto err5;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
|
||||||
|
if (err < 0)
|
||||||
|
- goto err5;
|
||||||
|
-
|
||||||
|
- INIT_DEFERRABLE_WORK(&flowtable->data.gc_work, type->gc);
|
||||||
|
- queue_delayed_work(system_power_efficient_wq,
|
||||||
|
- &flowtable->data.gc_work, HZ);
|
||||||
|
+ goto err6;
|
||||||
|
|
||||||
|
list_add_tail_rcu(&flowtable->list, &table->flowtables);
|
||||||
|
table->use++;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
-err5:
|
||||||
|
+err6:
|
||||||
|
i = flowtable->ops_len;
|
||||||
|
-err4:
|
||||||
|
+err5:
|
||||||
|
for (k = i - 1; k >= 0; k--)
|
||||||
|
nf_unregister_net_hook(net, &flowtable->ops[i]);
|
||||||
|
|
||||||
|
kfree(flowtable->ops);
|
||||||
|
+err4:
|
||||||
|
+ flowtable->data.type->free(&flowtable->data);
|
||||||
|
err3:
|
||||||
|
module_put(type->owner);
|
||||||
|
err2:
|
||||||
|
@@ -5395,10 +5393,8 @@ err:
|
||||||
|
|
||||||
|
static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
|
||||||
|
{
|
||||||
|
- cancel_delayed_work_sync(&flowtable->data.gc_work);
|
||||||
|
kfree(flowtable->name);
|
||||||
|
flowtable->data.type->free(&flowtable->data);
|
||||||
|
- rhashtable_destroy(&flowtable->data.rhashtable);
|
||||||
|
module_put(flowtable->data.type->owner);
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,22 @@
|
||||||
|
From: Felix Fietkau <nbd@nbd.name>
|
||||||
|
Date: Tue, 20 Feb 2018 14:48:51 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_flow_table: fix priv pointer for netdev hook
|
||||||
|
|
||||||
|
The offload ip hook expects a pointer to the flowtable, not to the
|
||||||
|
rhashtable. Since the rhashtable is the first member, this is safe for
|
||||||
|
the moment, but breaks as soon as the structure layout changes
|
||||||
|
|
||||||
|
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/netfilter/nf_tables_api.c
|
||||||
|
+++ b/net/netfilter/nf_tables_api.c
|
||||||
|
@@ -4947,7 +4947,7 @@ static int nf_tables_flowtable_parse_hoo
|
||||||
|
flowtable->ops[i].pf = NFPROTO_NETDEV;
|
||||||
|
flowtable->ops[i].hooknum = hooknum;
|
||||||
|
flowtable->ops[i].priority = priority;
|
||||||
|
- flowtable->ops[i].priv = &flowtable->data.rhashtable;
|
||||||
|
+ flowtable->ops[i].priv = &flowtable->data;
|
||||||
|
flowtable->ops[i].hook = flowtable->data.type->hook;
|
||||||
|
flowtable->ops[i].dev = dev_array[i];
|
||||||
|
}
|
|
@ -0,0 +1,114 @@
|
||||||
|
From: Felix Fietkau <nbd@nbd.name>
|
||||||
|
Date: Tue, 20 Feb 2018 14:08:14 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_flow_table: track flow tables in nf_flow_table
|
||||||
|
directly
|
||||||
|
|
||||||
|
Avoids having nf_flow_table depend on nftables (useful for future
|
||||||
|
iptables backport work)
|
||||||
|
|
||||||
|
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/net/netfilter/nf_flow_table.h
|
||||||
|
+++ b/include/net/netfilter/nf_flow_table.h
|
||||||
|
@@ -21,6 +21,7 @@ struct nf_flowtable_type {
|
||||||
|
};
|
||||||
|
|
||||||
|
struct nf_flowtable {
|
||||||
|
+ struct list_head list;
|
||||||
|
struct rhashtable rhashtable;
|
||||||
|
const struct nf_flowtable_type *type;
|
||||||
|
struct delayed_work gc_work;
|
||||||
|
--- a/include/net/netfilter/nf_tables.h
|
||||||
|
+++ b/include/net/netfilter/nf_tables.h
|
||||||
|
@@ -1096,9 +1096,6 @@ struct nft_flowtable {
|
||||||
|
struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
|
||||||
|
const struct nlattr *nla,
|
||||||
|
u8 genmask);
|
||||||
|
-void nft_flow_table_iterate(struct net *net,
|
||||||
|
- void (*iter)(struct nf_flowtable *flowtable, void *data),
|
||||||
|
- void *data);
|
||||||
|
|
||||||
|
void nft_register_flowtable_type(struct nf_flowtable_type *type);
|
||||||
|
void nft_unregister_flowtable_type(struct nf_flowtable_type *type);
|
||||||
|
--- a/net/netfilter/nf_flow_table_core.c
|
||||||
|
+++ b/net/netfilter/nf_flow_table_core.c
|
||||||
|
@@ -18,6 +18,9 @@ struct flow_offload_entry {
|
||||||
|
struct rcu_head rcu_head;
|
||||||
|
};
|
||||||
|
|
||||||
|
+static DEFINE_MUTEX(flowtable_lock);
|
||||||
|
+static LIST_HEAD(flowtables);
|
||||||
|
+
|
||||||
|
static void
|
||||||
|
flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
|
||||||
|
struct nf_flow_route *route,
|
||||||
|
@@ -410,6 +413,10 @@ int nf_flow_table_init(struct nf_flowtab
|
||||||
|
queue_delayed_work(system_power_efficient_wq,
|
||||||
|
&flowtable->gc_work, HZ);
|
||||||
|
|
||||||
|
+ mutex_lock(&flowtable_lock);
|
||||||
|
+ list_add(&flowtable->list, &flowtables);
|
||||||
|
+ mutex_unlock(&flowtable_lock);
|
||||||
|
+
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nf_flow_table_init);
|
||||||
|
@@ -425,20 +432,28 @@ static void nf_flow_table_do_cleanup(str
|
||||||
|
}
|
||||||
|
|
||||||
|
static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
|
||||||
|
- void *data)
|
||||||
|
+ struct net_device *dev)
|
||||||
|
{
|
||||||
|
- nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data);
|
||||||
|
+ nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
|
||||||
|
flush_delayed_work(&flowtable->gc_work);
|
||||||
|
}
|
||||||
|
|
||||||
|
void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
|
||||||
|
{
|
||||||
|
- nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev);
|
||||||
|
+ struct nf_flowtable *flowtable;
|
||||||
|
+
|
||||||
|
+ mutex_lock(&flowtable_lock);
|
||||||
|
+ list_for_each_entry(flowtable, &flowtables, list)
|
||||||
|
+ nf_flow_table_iterate_cleanup(flowtable, dev);
|
||||||
|
+ mutex_unlock(&flowtable_lock);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
|
||||||
|
|
||||||
|
void nf_flow_table_free(struct nf_flowtable *flow_table)
|
||||||
|
{
|
||||||
|
+ mutex_lock(&flowtable_lock);
|
||||||
|
+ list_del(&flow_table->list);
|
||||||
|
+ mutex_unlock(&flowtable_lock);
|
||||||
|
cancel_delayed_work_sync(&flow_table->gc_work);
|
||||||
|
nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
|
||||||
|
WARN_ON(!nf_flow_offload_gc_step(flow_table));
|
||||||
|
--- a/net/netfilter/nf_tables_api.c
|
||||||
|
+++ b/net/netfilter/nf_tables_api.c
|
||||||
|
@@ -4991,23 +4991,6 @@ static const struct nf_flowtable_type *n
|
||||||
|
return ERR_PTR(-ENOENT);
|
||||||
|
}
|
||||||
|
|
||||||
|
-void nft_flow_table_iterate(struct net *net,
|
||||||
|
- void (*iter)(struct nf_flowtable *flowtable, void *data),
|
||||||
|
- void *data)
|
||||||
|
-{
|
||||||
|
- struct nft_flowtable *flowtable;
|
||||||
|
- const struct nft_table *table;
|
||||||
|
-
|
||||||
|
- nfnl_lock(NFNL_SUBSYS_NFTABLES);
|
||||||
|
- list_for_each_entry(table, &net->nft.tables, list) {
|
||||||
|
- list_for_each_entry(flowtable, &table->flowtables, list) {
|
||||||
|
- iter(&flowtable->data, data);
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
- nfnl_unlock(NFNL_SUBSYS_NFTABLES);
|
||||||
|
-}
|
||||||
|
-EXPORT_SYMBOL_GPL(nft_flow_table_iterate);
|
||||||
|
-
|
||||||
|
static void nft_unregister_flowtable_net_hooks(struct net *net,
|
||||||
|
struct nft_flowtable *flowtable)
|
||||||
|
{
|
|
@ -0,0 +1,38 @@
|
||||||
|
From: Felix Fietkau <nbd@nbd.name>
|
||||||
|
Date: Sun, 25 Feb 2018 15:37:27 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_flow_table: make flow_offload_dead inline
|
||||||
|
|
||||||
|
It is too trivial to keep as a separate exported function
|
||||||
|
|
||||||
|
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/net/netfilter/nf_flow_table.h
|
||||||
|
+++ b/include/net/netfilter/nf_flow_table.h
|
||||||
|
@@ -103,7 +103,10 @@ void nf_flow_table_cleanup(struct net *n
|
||||||
|
int nf_flow_table_init(struct nf_flowtable *flow_table);
|
||||||
|
void nf_flow_table_free(struct nf_flowtable *flow_table);
|
||||||
|
|
||||||
|
-void flow_offload_dead(struct flow_offload *flow);
|
||||||
|
+static inline void flow_offload_dead(struct flow_offload *flow)
|
||||||
|
+{
|
||||||
|
+ flow->flags |= FLOW_OFFLOAD_DYING;
|
||||||
|
+}
|
||||||
|
|
||||||
|
int nf_flow_snat_port(const struct flow_offload *flow,
|
||||||
|
struct sk_buff *skb, unsigned int thoff,
|
||||||
|
--- a/net/netfilter/nf_flow_table_core.c
|
||||||
|
+++ b/net/netfilter/nf_flow_table_core.c
|
||||||
|
@@ -113,12 +113,6 @@ void flow_offload_free(struct flow_offlo
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(flow_offload_free);
|
||||||
|
|
||||||
|
-void flow_offload_dead(struct flow_offload *flow)
|
||||||
|
-{
|
||||||
|
- flow->flags |= FLOW_OFFLOAD_DYING;
|
||||||
|
-}
|
||||||
|
-EXPORT_SYMBOL_GPL(flow_offload_dead);
|
||||||
|
-
|
||||||
|
static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
|
||||||
|
{
|
||||||
|
const struct flow_offload_tuple *tuple = data;
|
|
@ -0,0 +1,83 @@
|
||||||
|
From: Felix Fietkau <nbd@nbd.name>
|
||||||
|
Date: Sun, 25 Feb 2018 15:38:31 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_flow_table: add a new flow state for
|
||||||
|
tearing down offloading
|
||||||
|
|
||||||
|
Will be used to tear down the offload entry while keeping the conntrack
|
||||||
|
entry alive.
|
||||||
|
|
||||||
|
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/include/net/netfilter/nf_flow_table.h
|
||||||
|
+++ b/include/net/netfilter/nf_flow_table.h
|
||||||
|
@@ -68,6 +68,7 @@ struct flow_offload_tuple_rhash {
|
||||||
|
#define FLOW_OFFLOAD_SNAT 0x1
|
||||||
|
#define FLOW_OFFLOAD_DNAT 0x2
|
||||||
|
#define FLOW_OFFLOAD_DYING 0x4
|
||||||
|
+#define FLOW_OFFLOAD_TEARDOWN 0x8
|
||||||
|
|
||||||
|
struct flow_offload {
|
||||||
|
struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
|
||||||
|
@@ -103,6 +104,7 @@ void nf_flow_table_cleanup(struct net *n
|
||||||
|
int nf_flow_table_init(struct nf_flowtable *flow_table);
|
||||||
|
void nf_flow_table_free(struct nf_flowtable *flow_table);
|
||||||
|
|
||||||
|
+void flow_offload_teardown(struct flow_offload *flow);
|
||||||
|
static inline void flow_offload_dead(struct flow_offload *flow)
|
||||||
|
{
|
||||||
|
flow->flags |= FLOW_OFFLOAD_DYING;
|
||||||
|
--- a/net/netfilter/nf_flow_table_core.c
|
||||||
|
+++ b/net/netfilter/nf_flow_table_core.c
|
||||||
|
@@ -174,6 +174,12 @@ static void flow_offload_del(struct nf_f
|
||||||
|
flow_offload_free(flow);
|
||||||
|
}
|
||||||
|
|
||||||
|
+void flow_offload_teardown(struct flow_offload *flow)
|
||||||
|
+{
|
||||||
|
+ flow->flags |= FLOW_OFFLOAD_TEARDOWN;
|
||||||
|
+}
|
||||||
|
+EXPORT_SYMBOL_GPL(flow_offload_teardown);
|
||||||
|
+
|
||||||
|
struct flow_offload_tuple_rhash *
|
||||||
|
flow_offload_lookup(struct nf_flowtable *flow_table,
|
||||||
|
struct flow_offload_tuple *tuple)
|
||||||
|
@@ -226,11 +232,6 @@ static inline bool nf_flow_has_expired(c
|
||||||
|
return (__s32)(flow->timeout - (u32)jiffies) <= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static inline bool nf_flow_is_dying(const struct flow_offload *flow)
|
||||||
|
-{
|
||||||
|
- return flow->flags & FLOW_OFFLOAD_DYING;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
|
||||||
|
{
|
||||||
|
struct flow_offload_tuple_rhash *tuplehash;
|
||||||
|
@@ -258,7 +259,8 @@ static int nf_flow_offload_gc_step(struc
|
||||||
|
flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
|
||||||
|
|
||||||
|
if (nf_flow_has_expired(flow) ||
|
||||||
|
- nf_flow_is_dying(flow))
|
||||||
|
+ (flow->flags & (FLOW_OFFLOAD_DYING |
|
||||||
|
+ FLOW_OFFLOAD_TEARDOWN)))
|
||||||
|
flow_offload_del(flow_table, flow);
|
||||||
|
}
|
||||||
|
out:
|
||||||
|
@@ -419,10 +421,14 @@ static void nf_flow_table_do_cleanup(str
|
||||||
|
{
|
||||||
|
struct net_device *dev = data;
|
||||||
|
|
||||||
|
- if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
|
||||||
|
+ if (!dev) {
|
||||||
|
+ flow_offload_teardown(flow);
|
||||||
|
return;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
- flow_offload_dead(flow);
|
||||||
|
+ if (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
|
||||||
|
+ flow->tuplehash[1].tuple.iifidx == dev->ifindex)
|
||||||
|
+ flow_offload_dead(flow);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
|
|
@ -0,0 +1,36 @@
|
||||||
|
From: Felix Fietkau <nbd@nbd.name>
|
||||||
|
Date: Sun, 25 Feb 2018 15:39:56 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_flow_table: in flow_offload_lookup, skip
|
||||||
|
entries being deleted
|
||||||
|
|
||||||
|
Preparation for sending flows back to the slow path
|
||||||
|
|
||||||
|
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/netfilter/nf_flow_table_core.c
|
||||||
|
+++ b/net/netfilter/nf_flow_table_core.c
|
||||||
|
@@ -184,8 +184,21 @@ struct flow_offload_tuple_rhash *
|
||||||
|
flow_offload_lookup(struct nf_flowtable *flow_table,
|
||||||
|
struct flow_offload_tuple *tuple)
|
||||||
|
{
|
||||||
|
- return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
|
||||||
|
- nf_flow_offload_rhash_params);
|
||||||
|
+ struct flow_offload_tuple_rhash *tuplehash;
|
||||||
|
+ struct flow_offload *flow;
|
||||||
|
+ int dir;
|
||||||
|
+
|
||||||
|
+ tuplehash = rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
|
||||||
|
+ nf_flow_offload_rhash_params);
|
||||||
|
+ if (!tuplehash)
|
||||||
|
+ return NULL;
|
||||||
|
+
|
||||||
|
+ dir = tuplehash->tuple.dir;
|
||||||
|
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
|
||||||
|
+ if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
|
||||||
|
+ return NULL;
|
||||||
|
+
|
||||||
|
+ return tuplehash;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(flow_offload_lookup);
|
||||||
|
|
|
@ -0,0 +1,99 @@
|
||||||
|
From: Felix Fietkau <nbd@nbd.name>
|
||||||
|
Date: Sun, 25 Feb 2018 15:41:11 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_flow_table: add support for sending flows
|
||||||
|
back to the slow path
|
||||||
|
|
||||||
|
Reset the timeout. For TCP, also set the state to indicate to use the
|
||||||
|
next incoming packets to reset window tracking.
|
||||||
|
This allows the slow path to take over again once the offload state has
|
||||||
|
been torn down
|
||||||
|
|
||||||
|
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/netfilter/nf_flow_table_core.c
|
||||||
|
+++ b/net/netfilter/nf_flow_table_core.c
|
||||||
|
@@ -100,6 +100,43 @@ err_ct_refcnt:
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(flow_offload_alloc);
|
||||||
|
|
||||||
|
+static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
|
||||||
|
+{
|
||||||
|
+ tcp->state = TCP_CONNTRACK_ESTABLISHED;
|
||||||
|
+ tcp->seen[0].td_maxwin = 0;
|
||||||
|
+ tcp->seen[1].td_maxwin = 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void flow_offload_fixup_ct_state(struct nf_conn *ct)
|
||||||
|
+{
|
||||||
|
+ const struct nf_conntrack_l4proto *l4proto;
|
||||||
|
+ struct net *net = nf_ct_net(ct);
|
||||||
|
+ unsigned int *timeouts;
|
||||||
|
+ unsigned int timeout;
|
||||||
|
+ int l4num;
|
||||||
|
+
|
||||||
|
+ l4num = nf_ct_protonum(ct);
|
||||||
|
+ if (l4num == IPPROTO_TCP)
|
||||||
|
+ flow_offload_fixup_tcp(&ct->proto.tcp);
|
||||||
|
+
|
||||||
|
+ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), l4num);
|
||||||
|
+ if (!l4proto)
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ timeouts = l4proto->get_timeouts(net);
|
||||||
|
+ if (!timeouts)
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ if (l4num == IPPROTO_TCP)
|
||||||
|
+ timeout = timeouts[TCP_CONNTRACK_ESTABLISHED];
|
||||||
|
+ else if (l4num == IPPROTO_UDP)
|
||||||
|
+ timeout = timeouts[UDP_CT_REPLIED];
|
||||||
|
+ else
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ ct->timeout = nfct_time_stamp + timeout;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
void flow_offload_free(struct flow_offload *flow)
|
||||||
|
{
|
||||||
|
struct flow_offload_entry *e;
|
||||||
|
@@ -107,7 +144,8 @@ void flow_offload_free(struct flow_offlo
|
||||||
|
dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
|
||||||
|
dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
|
||||||
|
e = container_of(flow, struct flow_offload_entry, flow);
|
||||||
|
- nf_ct_delete(e->ct, 0, 0);
|
||||||
|
+ if (flow->flags & FLOW_OFFLOAD_DYING)
|
||||||
|
+ nf_ct_delete(e->ct, 0, 0);
|
||||||
|
nf_ct_put(e->ct);
|
||||||
|
kfree_rcu(e, rcu_head);
|
||||||
|
}
|
||||||
|
@@ -164,6 +202,8 @@ EXPORT_SYMBOL_GPL(flow_offload_add);
|
||||||
|
static void flow_offload_del(struct nf_flowtable *flow_table,
|
||||||
|
struct flow_offload *flow)
|
||||||
|
{
|
||||||
|
+ struct flow_offload_entry *e;
|
||||||
|
+
|
||||||
|
rhashtable_remove_fast(&flow_table->rhashtable,
|
||||||
|
&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
|
||||||
|
nf_flow_offload_rhash_params);
|
||||||
|
@@ -171,12 +211,20 @@ static void flow_offload_del(struct nf_f
|
||||||
|
&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
|
||||||
|
nf_flow_offload_rhash_params);
|
||||||
|
|
||||||
|
+ e = container_of(flow, struct flow_offload_entry, flow);
|
||||||
|
+ clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
|
||||||
|
+
|
||||||
|
flow_offload_free(flow);
|
||||||
|
}
|
||||||
|
|
||||||
|
void flow_offload_teardown(struct flow_offload *flow)
|
||||||
|
{
|
||||||
|
+ struct flow_offload_entry *e;
|
||||||
|
+
|
||||||
|
flow->flags |= FLOW_OFFLOAD_TEARDOWN;
|
||||||
|
+
|
||||||
|
+ e = container_of(flow, struct flow_offload_entry, flow);
|
||||||
|
+ flow_offload_fixup_ct_state(e->ct);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(flow_offload_teardown);
|
||||||
|
|
|
@ -0,0 +1,81 @@
|
||||||
|
From: Felix Fietkau <nbd@nbd.name>
|
||||||
|
Date: Sun, 25 Feb 2018 15:42:58 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_flow_table: tear down TCP flows if RST or
|
||||||
|
FIN was seen
|
||||||
|
|
||||||
|
Allow the slow path to handle the shutdown of the connection with proper
|
||||||
|
timeouts
|
||||||
|
|
||||||
|
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/netfilter/nf_flow_table_ip.c
|
||||||
|
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||||
|
@@ -15,6 +15,23 @@
|
||||||
|
#include <linux/tcp.h>
|
||||||
|
#include <linux/udp.h>
|
||||||
|
|
||||||
|
+static int nf_flow_tcp_state_check(struct flow_offload *flow,
|
||||||
|
+ struct sk_buff *skb, unsigned int thoff)
|
||||||
|
+{
|
||||||
|
+ struct tcphdr *tcph;
|
||||||
|
+
|
||||||
|
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ tcph = (void *)(skb_network_header(skb) + thoff);
|
||||||
|
+ if (unlikely(tcph->fin || tcph->rst)) {
|
||||||
|
+ flow_offload_teardown(flow);
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
|
||||||
|
__be32 addr, __be32 new_addr)
|
||||||
|
{
|
||||||
|
@@ -118,10 +135,9 @@ static int nf_flow_dnat_ip(const struct
|
||||||
|
}
|
||||||
|
|
||||||
|
static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
|
||||||
|
- enum flow_offload_tuple_dir dir)
|
||||||
|
+ unsigned int thoff, enum flow_offload_tuple_dir dir)
|
||||||
|
{
|
||||||
|
struct iphdr *iph = ip_hdr(skb);
|
||||||
|
- unsigned int thoff = iph->ihl * 4;
|
||||||
|
|
||||||
|
if (flow->flags & FLOW_OFFLOAD_SNAT &&
|
||||||
|
(nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
|
||||||
|
@@ -201,6 +217,7 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||||
|
struct flow_offload *flow;
|
||||||
|
struct net_device *outdev;
|
||||||
|
const struct rtable *rt;
|
||||||
|
+ unsigned int thoff;
|
||||||
|
struct iphdr *iph;
|
||||||
|
__be32 nexthop;
|
||||||
|
|
||||||
|
@@ -229,8 +246,12 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||||
|
if (skb_try_make_writable(skb, sizeof(*iph)))
|
||||||
|
return NF_DROP;
|
||||||
|
|
||||||
|
+ thoff = ip_hdr(skb)->ihl * 4;
|
||||||
|
+ if (nf_flow_tcp_state_check(flow, skb, thoff))
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
|
||||||
|
- nf_flow_nat_ip(flow, skb, dir) < 0)
|
||||||
|
+ nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
|
||||||
|
return NF_DROP;
|
||||||
|
|
||||||
|
flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
|
||||||
|
@@ -438,6 +459,9 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||||
|
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
|
||||||
|
return NF_ACCEPT;
|
||||||
|
|
||||||
|
+ if (nf_flow_tcp_state_check(flow, skb, sizeof(*ip6h)))
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
if (skb_try_make_writable(skb, sizeof(*ip6h)))
|
||||||
|
return NF_DROP;
|
||||||
|
|
|
@ -0,0 +1,19 @@
|
||||||
|
From: Felix Fietkau <nbd@nbd.name>
|
||||||
|
Date: Sun, 25 Feb 2018 17:22:55 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_flow_table: fix checksum when handling DNAT
|
||||||
|
|
||||||
|
Add a missing call to csum_replace4 like on SNAT
|
||||||
|
|
||||||
|
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/netfilter/nf_flow_table_ip.c
|
||||||
|
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||||
|
@@ -130,6 +130,7 @@ static int nf_flow_dnat_ip(const struct
|
||||||
|
default:
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
+ csum_replace4(&iph->check, addr, new_addr);
|
||||||
|
|
||||||
|
return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
|
||||||
|
}
|
|
@ -0,0 +1,89 @@
|
||||||
|
From: Felix Fietkau <nbd@nbd.name>
|
||||||
|
Date: Thu, 15 Mar 2018 18:21:43 +0100
|
||||||
|
Subject: [PATCH] netfilter: nf_flow_table: clean up and fix dst handling
|
||||||
|
|
||||||
|
dst handling in the code is inconsistent and possibly wrong. In my test,
|
||||||
|
skb_dst(skb) holds the dst entry after routing but before NAT, so the
|
||||||
|
code could possibly return the same dst entry for both directions of a
|
||||||
|
connection.
|
||||||
|
Additionally, there was some confusion over the dst entry vs the address
|
||||||
|
passed as parameter to rt_nexthop/rt6_nexthop.
|
||||||
|
|
||||||
|
Do an explicit dst lookup for both ends of the connection and always use
|
||||||
|
the source address for it. When running the IP hook, use the dst entry
|
||||||
|
for the opposite direction for determining the route.
|
||||||
|
|
||||||
|
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||||
|
---
|
||||||
|
|
||||||
|
--- a/net/netfilter/nf_flow_table_ip.c
|
||||||
|
+++ b/net/netfilter/nf_flow_table_ip.c
|
||||||
|
@@ -238,7 +238,7 @@ nf_flow_offload_ip_hook(void *priv, stru
|
||||||
|
|
||||||
|
dir = tuplehash->tuple.dir;
|
||||||
|
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
|
||||||
|
- rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
|
||||||
|
+ rt = (const struct rtable *)flow->tuplehash[!dir].tuple.dst_cache;
|
||||||
|
|
||||||
|
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) &&
|
||||||
|
(ip_hdr(skb)->frag_off & htons(IP_DF)) != 0)
|
||||||
|
@@ -455,7 +455,7 @@ nf_flow_offload_ipv6_hook(void *priv, st
|
||||||
|
|
||||||
|
dir = tuplehash->tuple.dir;
|
||||||
|
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
|
||||||
|
- rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
|
||||||
|
+ rt = (struct rt6_info *)flow->tuplehash[!dir].tuple.dst_cache;
|
||||||
|
|
||||||
|
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
|
||||||
|
return NF_ACCEPT;
|
||||||
|
--- a/net/netfilter/nft_flow_offload.c
|
||||||
|
+++ b/net/netfilter/nft_flow_offload.c
|
||||||
|
@@ -17,27 +17,38 @@ struct nft_flow_offload {
|
||||||
|
struct nft_flowtable *flowtable;
|
||||||
|
};
|
||||||
|
|
||||||
|
-static int nft_flow_route(const struct nft_pktinfo *pkt,
|
||||||
|
- const struct nf_conn *ct,
|
||||||
|
- struct nf_flow_route *route,
|
||||||
|
- enum ip_conntrack_dir dir)
|
||||||
|
+static struct dst_entry *
|
||||||
|
+nft_flow_dst(const struct nf_conn *ct, enum ip_conntrack_dir dir,
|
||||||
|
+ const struct nft_pktinfo *pkt)
|
||||||
|
{
|
||||||
|
- struct dst_entry *this_dst = skb_dst(pkt->skb);
|
||||||
|
- struct dst_entry *other_dst = NULL;
|
||||||
|
+ struct dst_entry *dst;
|
||||||
|
struct flowi fl;
|
||||||
|
|
||||||
|
memset(&fl, 0, sizeof(fl));
|
||||||
|
switch (nft_pf(pkt)) {
|
||||||
|
case NFPROTO_IPV4:
|
||||||
|
- fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
|
||||||
|
+ fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
|
||||||
|
break;
|
||||||
|
case NFPROTO_IPV6:
|
||||||
|
- fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
|
||||||
|
+ fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
- nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt));
|
||||||
|
- if (!other_dst)
|
||||||
|
+ nf_route(nft_net(pkt), &dst, &fl, false, nft_pf(pkt));
|
||||||
|
+
|
||||||
|
+ return dst;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nft_flow_route(const struct nft_pktinfo *pkt,
|
||||||
|
+ const struct nf_conn *ct,
|
||||||
|
+ struct nf_flow_route *route,
|
||||||
|
+ enum ip_conntrack_dir dir)
|
||||||
|
+{
|
||||||
|
+ struct dst_entry *this_dst, *other_dst;
|
||||||
|
+
|
||||||
|
+ this_dst = nft_flow_dst(ct, dir, pkt);
|
||||||
|
+ other_dst = nft_flow_dst(ct, !dir, pkt);
|
||||||
|
+ if (!this_dst || !other_dst)
|
||||||
|
return -ENOENT;
|
||||||
|
|
||||||
|
route->tuple[dir].dst = this_dst;
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue