diffstat for iptables-1.6.0 iptables-1.6.0 changelog | 96 control | 11 iptables-dev.doc-base.netfilter-extensions | 9 iptables-dev.doc-base.netfilter-hacking | 9 iptables-dev.install | 4 iptables.doc-base.nat | 11 iptables.doc-base.packet-filter | 10 iptables.install | 3 libxtables11.install | 2 patches/9000-howtos.patch | 5760 ++++++++++++ patches/9002-libxt_recent-Add-support-for-reap-option.patch | 26 patches/series | 3 rules | 7 13 files changed, 5943 insertions(+), 8 deletions(-) diff -Nru iptables-1.6.0/debian/changelog iptables-1.6.0/debian/changelog --- iptables-1.6.0/debian/changelog 2016-01-27 18:07:55.000000000 +0000 +++ iptables-1.6.0/debian/changelog 2016-02-19 15:17:20.000000000 +0000 @@ -1,3 +1,35 @@ +iptables (1.6.0-2ubuntu3) xenial; urgency=medium + + * Don't ship an empty iptables-nftables-compat package. + + -- Matthias Klose Fri, 19 Feb 2016 15:13:01 +0000 + +iptables (1.6.0-2ubuntu2) xenial; urgency=medium + + * Multiarchify the libxtables11 package. + * Move other libraries to the multiarch libdir. + + -- Matthias Klose Mon, 15 Feb 2016 21:21:05 +0100 + +iptables (1.6.0-2ubuntu1) xenial; urgency=medium + + * Merge with Debian; remaining changes: + - debian/control: add linuxdoc-tools dep, remove libipq references + - debian/rules: compile with --disable-libipq + - 9000-howtos.patch: add howtos/ and install them + - 9002-libxt_recent-Add-support-for-reap-option.patch: Some changes are + upstream, patch needed for additional reap option checks. + - debian/iptables.install: install NAT and packetfilter howtos into + /usr/share/doc + - debian/iptables-dev.doc-base.netfilter-extensions, + debian/iptables-dev.doc-base.netfilter-hacking, + debian/iptables.doc-base.nat, debian/iptables.doc-base.packet-filter: + add howtos + - debian/iptables-dev.install: remove usr/share/man/man3 only used with + libipq manpages + + -- Matthias Klose Mon, 15 Feb 2016 21:06:54 +0100 + iptables (1.6.0-2) unstable; urgency=medium * Rebuild for unstable @@ -30,6 +62,31 @@ -- Arturo Borrero Gonzalez Fri, 15 Jan 2016 13:23:10 +0100 +iptables (1.4.21-2ubuntu2) vivid; urgency=medium + + * No change rebuild to get debug symbols for all architectures. + + -- Brian Murray Wed, 03 Dec 2014 08:10:31 -0800 + +iptables (1.4.21-2ubuntu1) utopic; urgency=medium + + * Merge from Debian unstable. Remaining changes: + - debian/control: add linuxdoc-tools dep, remove libipq references + - debian/rules: compile with --disable-libipq + - 9000-howtos.patch: add howtos/ and install them + - 9002-libxt_recent-Add-support-for-reap-option.patch: Some changes are + upstream, patch needed for additional reap option checks. + - debian/iptables.install: install NAT and packetfilter howtos into + /usr/share/doc + - debian/iptables-dev.doc-base.netfilter-extensions, + debian/iptables-dev.doc-base.netfilter-hacking, + debian/iptables.doc-base.nat, debian/iptables.doc-base.packet-filter: + add howtos + - debian/iptables-dev.install: remove usr/share/man/man3 only used with + libipq manpages + + -- Stéphane Graber Thu, 05 Jun 2014 16:32:50 -0400 + iptables (1.4.21-2) unstable; urgency=medium * correct _dhopts var to enable autoreconf. Closes: #744968 @@ -39,6 +96,26 @@ -- Laurence J. Lane Mon, 19 May 2014 20:49:01 -0400 +iptables (1.4.21-1ubuntu1) trusty; urgency=medium + + * Merge from Debian unstable. Remaining changes: + - debian/control: add linuxdoc-tools dep, remove libipq references + - debian/rules: compile with --disable-libipq + - 9000-howtos.patch: add howtos/ and install them + - 9002-libxt_recent-Add-support-for-reap-option.patch: Some changes are + upstream, patch needed for additional reap option checks. + - debian/iptables.install: install NAT and packetfilter howtos into + /usr/share/doc + - debian/iptables-dev.doc-base.netfilter-extensions, + debian/iptables-dev.doc-base.netfilter-hacking, + debian/iptables.doc-base.nat, debian/iptables.doc-base.packet-filter: + add howtos + - debian/iptables-dev.install: remove usr/share/man/man3 only used with + libipq manpages + * Fix --with autoreconf in debian/rules + + -- Stéphane Graber Wed, 08 Jan 2014 17:20:40 -0500 + iptables (1.4.21-1) unstable; urgency=low * New upstream release @@ -61,6 +138,25 @@ -- Laurence J. Lane Sun, 01 Dec 2013 19:48:23 -0500 +iptables (1.4.20-2ubuntu1) trusty; urgency=low + + * Re-apply mistakenly dropped delta with Debian: + - debian/control: add linuxdoc-tools dep, remove libipq references + - debian/rules: compile with --disable-libipq + - 9000-howtos.patch: add howtos/ and install them + - 9002-libxt_recent-Add-support-for-reap-option.patch: Some changes are + upstream, patch needed for additional reap option checks. + - debian/iptables.install: install NAT and packetfilter howtos into + /usr/share/doc + - debian/iptables-dev.doc-base.netfilter-extensions, + debian/iptables-dev.doc-base.netfilter-hacking, + debian/iptables.doc-base.nat, debian/iptables.doc-base.packet-filter: + add howtos + - debian/iptables-dev.install: remove usr/share/man/man3 only used with + libipq manpages + + -- Stéphane Graber Wed, 23 Oct 2013 19:55:19 -0400 + iptables (1.4.20-2) unstable; urgency=low * Fixed man page installation so that @PACKAGE_VERSION@ is expanded. diff -Nru iptables-1.6.0/debian/control iptables-1.6.0/debian/control --- iptables-1.6.0/debian/control 2016-01-19 09:32:06.000000000 +0000 +++ iptables-1.6.0/debian/control 2016-02-19 15:13:49.000000000 +0000 @@ -1,7 +1,8 @@ Source: iptables Section: net Priority: important -Maintainer: iptables devel team +XSBC-Original-Maintainer: iptables devel team +Maintainer: Ubuntu Developers Uploaders: Laurence J. Lane , Arturo Borrero Gonzalez Build-Depends: autoconf, @@ -15,6 +16,7 @@ libnetfilter-conntrack3, libnfnetlink-dev, libnftnl-dev, + linuxdoc-tools, libtool (>= 2.2.6) Standards-Version: 3.9.6 Homepage: http://www.netfilter.org/ @@ -33,7 +35,9 @@ configuring the IPv6 packet filter Package: libxtables11 +Multi-Arch: same Architecture: linux-any +Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends} Replaces: iptables (<< 1.4.16.3-3) Breaks: iptables (<< 1.4.16.3-3) @@ -50,8 +54,8 @@ Description: iptables development files iptables is used to setup, maintain, and inspect the tables of packet filter rules in the Linux kernel. This package contains - the available library (libipq, libiptc, libxtables), header, - documentation and related files for iptables development. + the available library (libiptc, libxtables), header, documentation and related + files for iptables development. Package: iptables-nftables-compat Architecture: linux-any @@ -63,6 +67,7 @@ ${misc:Depends}, ${shlibs:Depends} Recommends: nftables +Replaces: iptables-dev (<< 1.6.0-2ubuntu3) Description: iptables compat tools for nftables this package includes the compat tools to load iptables, ip6tables, arptables and ebtables rules to the nf_tables kernel subsystem. diff -Nru iptables-1.6.0/debian/iptables-dev.doc-base.netfilter-extensions iptables-1.6.0/debian/iptables-dev.doc-base.netfilter-extensions --- iptables-1.6.0/debian/iptables-dev.doc-base.netfilter-extensions 1970-01-01 00:00:00.000000000 +0000 +++ iptables-1.6.0/debian/iptables-dev.doc-base.netfilter-extensions 2016-02-15 20:16:33.000000000 +0000 @@ -0,0 +1,9 @@ +Document: netfilter-extensions +Title: Netfilter Extensions HOWTO +Author: Fabrice MARIE +Abstract: This document describes how to install and use current iptables extensions for netfilter. +Section: Help/HOWTO + +Format: HTML +Index: /usr/share/doc/iptables-dev/html/netfilter-extensions-HOWTO.html +Files: /usr/share/doc/iptables-dev/html/netfilter-extensions-HOWTO-?.html diff -Nru iptables-1.6.0/debian/iptables-dev.doc-base.netfilter-hacking iptables-1.6.0/debian/iptables-dev.doc-base.netfilter-hacking --- iptables-1.6.0/debian/iptables-dev.doc-base.netfilter-hacking 1970-01-01 00:00:00.000000000 +0000 +++ iptables-1.6.0/debian/iptables-dev.doc-base.netfilter-hacking 2016-02-15 20:16:33.000000000 +0000 @@ -0,0 +1,9 @@ +Document: netfilter-hacking +Title: Linux netfilter Hacking HOWTO +Author: Rusty Russell +Abstract: This document describes the netfilter architecture for Linux, how to hack it, and some of the major systems which sit on top of it, such as packet filtering, connection tracking and Network Address Translation. +Section: Help/HOWTO + +Format: HTML +Index: /usr/share/doc/iptables-dev/html/netfilter-hacking-HOWTO.html +Files: /usr/share/doc/iptables-dev/html/netfilter-hacking-HOWTO-*.html diff -Nru iptables-1.6.0/debian/iptables-dev.install iptables-1.6.0/debian/iptables-dev.install --- iptables-1.6.0/debian/iptables-dev.install 2016-01-19 09:32:06.000000000 +0000 +++ iptables-1.6.0/debian/iptables-dev.install 2016-02-19 15:11:57.000000000 +0000 @@ -1,5 +1,5 @@ include/linux/netfilter_ipv4/ip_queue.h usr/include/linux/netfilter_ipv4/ -lib/lib*.so +lib/*-*/lib*.so lib/pkgconfig usr/lib usr/include -usr/share/man/man3 +howtos/netfilter*html usr/share/doc/iptables-dev/html diff -Nru iptables-1.6.0/debian/iptables.doc-base.nat iptables-1.6.0/debian/iptables.doc-base.nat --- iptables-1.6.0/debian/iptables.doc-base.nat 1970-01-01 00:00:00.000000000 +0000 +++ iptables-1.6.0/debian/iptables.doc-base.nat 2016-02-15 20:16:33.000000000 +0000 @@ -0,0 +1,11 @@ +Document: nat +Title: Linux 2.4/2.6 NAT HOWTO +Author: Rusty Russell +Abstract: This document describes how to do masquerading, transparent + proxying, port forwarding, and other forms of Network Address + Translations with the 2.6+ Linux Kernels. +Section: Help/HOWTO + +Format: HTML +Index: /usr/share/doc/iptables/html/NAT-HOWTO.html +Files: /usr/share/doc/iptables/html/NAT-HOWTO*.html diff -Nru iptables-1.6.0/debian/iptables.doc-base.packet-filter iptables-1.6.0/debian/iptables.doc-base.packet-filter --- iptables-1.6.0/debian/iptables.doc-base.packet-filter 1970-01-01 00:00:00.000000000 +0000 +++ iptables-1.6.0/debian/iptables.doc-base.packet-filter 2016-02-15 20:16:33.000000000 +0000 @@ -0,0 +1,10 @@ +Document: packet-filter +Title: Linux 2.4/2.6 Packet Filtering HOWTO +Author: Rusty Russell +Abstract: This document describes how to use iptables to filter + IP packets for the 2.6+ Linux kernels. +Section: Help/HOWTO + +Format: HTML +Index: /usr/share/doc/iptables/html/packet-filtering-HOWTO.html +Files: /usr/share/doc/iptables/html/packet-filtering-HOWTO*.html diff -Nru iptables-1.6.0/debian/iptables.install iptables-1.6.0/debian/iptables.install --- iptables-1.6.0/debian/iptables.install 2016-01-19 09:32:06.000000000 +0000 +++ iptables-1.6.0/debian/iptables.install 2016-02-15 20:26:42.000000000 +0000 @@ -1,6 +1,6 @@ iptables/iptables-apply usr/sbin iptables/iptables.xslt usr/share/iptables -lib/libip*.so.* +lib/*/libip*.so.* lib/xtables/libip*.so lib/xtables/libxt_*.so usr/sbin/ip6tables sbin @@ -13,3 +13,4 @@ usr/sbin/xtables-multi sbin usr/share/man/man1 usr/share/man/man8 +howtos/NAT*html debian/tmp/howtos/packet*html usr/share/doc/iptables/html diff -Nru iptables-1.6.0/debian/libxtables11.install iptables-1.6.0/debian/libxtables11.install --- iptables-1.6.0/debian/libxtables11.install 2016-01-19 09:32:06.000000000 +0000 +++ iptables-1.6.0/debian/libxtables11.install 2016-02-15 20:26:24.000000000 +0000 @@ -1 +1 @@ -lib/libxtables*.so.* +lib/*/libxtables*.so.* diff -Nru iptables-1.6.0/debian/patches/9000-howtos.patch iptables-1.6.0/debian/patches/9000-howtos.patch --- iptables-1.6.0/debian/patches/9000-howtos.patch 1970-01-01 00:00:00.000000000 +0000 +++ iptables-1.6.0/debian/patches/9000-howtos.patch 2016-02-15 20:16:33.000000000 +0000 @@ -0,0 +1,5760 @@ +Author: Soren Hansen +Description: Revert changes between 1.4.1.1-3 and 1.4.1.1-4, thus bringing back + the howtos. +Forwarded: no + +Index: iptables-1.4.12/howtos/Makefile +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ iptables-1.4.12/howtos/Makefile 2011-11-07 13:57:14.000000000 -0600 +@@ -0,0 +1,10 @@ ++all: ++ for i in *.sgml; do sgml2html $$i; done ++ ++install: ++ for i in *.html; do install -D -m 0644 $$i ${DESTDIR}/howtos/$$i; done ++ ++clean: ++ -rm *.html ++ ++.PHONY: all clean install +Index: iptables-1.4.12/howtos/NAT-HOWTO.sgml +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ iptables-1.4.12/howtos/NAT-HOWTO.sgml 2011-11-07 13:57:14.000000000 -0600 +@@ -0,0 +1,609 @@ ++ ++ ++ ++ ++ ++ ++
++ ++ ++ ++Linux 2.4 NAT HOWTO ++<author>Rusty Russell, mailing list <tt>netfilter@lists.samba.org</tt> ++<date>$Revision: 1.18 $ $Date: 2002/01/14 09:35:13 $ ++<abstract> ++This document describes how to do masquerading, transparent proxying, ++port forwarding, and other forms of Network Address Translations with ++the 2.4 Linux Kernels. ++</abstract> ++ ++<!-- Table of contents --> ++<toc> ++ ++<!-- Begin the document --> ++ ++<sect>Introduction<label id="intro"> ++ ++<p> ++Welcome, gentle reader. ++ ++<p> ++You are about to delve into the fascinating (and sometimes horrid) ++world of NAT: Network Address Translation, and this HOWTO is going to ++be your somewhat accurate guide to the 2.4 Linux Kernel and beyond. ++ ++<p>In Linux 2.4, an infrastructure for mangling packets was ++introduced, called `netfilter'. A layer on top of this provides NAT, ++completely reimplemented from previous kernels. ++ ++<p>(C) 2000 Paul `Rusty' Russell. Licensed under the GNU GPL. ++ ++<sect>Where is the official Web Site and List? ++ ++<p>There are three official sites: ++<itemize> ++<item>Thanks to <url url="http://netfilter.filewatcher.org/" name="Filewatcher">. ++<item>Thanks to <url url="http://netfilter.samba.org/" name="The Samba Team and SGI">. ++<item>Thanks to <url url="http://netfilter.gnumonks.org/" name="Harald Welte">. ++</itemize> ++ ++<p>You can reach all of them using round-robin DNS via ++<url url="http://www.netfilter.org/"> and <url url="http://www.iptables.org/"> ++ ++<p>For the official netfilter mailing list, see ++<url url="http://www.netfilter.org/contact.html#list" name="netfilter List">. ++ ++<sect1>What is Network Address Translation? ++ ++<p> ++Normally, packets on a network travel from their source (such as your ++home computer) to their destination (such as www.gnumonks.org) ++through many different links: about 19 from where I am in Australia. ++None of these links really alter your packet: they just send it ++onward. ++ ++<p> ++If one of these links were to do NAT, then they would alter the source ++or destinations of the packet as it passes through. As you can ++imagine, this is not how the system was designed to work, and hence ++NAT is always something of a crock. Usually the link doing NAT will ++remember how it mangled a packet, and when a reply packet passes ++through the other way, it will do the reverse mangling on that reply ++packet, so everything works. ++ ++<sect1>Why Would I Want To Do NAT? ++ ++<p>In a perfect world, you wouldn't. Meanwhile, the main reasons are: ++ ++<descrip> ++<tag/Modem Connections To The Internet/ Most ISPs give you a single IP ++address when you dial up to them. You can send out packets with any ++source address you want, but only replies to packets with this source ++IP address will return to you. If you want to use multiple different ++machines (such as a home network) to connect to the Internet through ++this one link, you'll need NAT. ++ ++<p>This is by far the most common use of NAT today, commonly known as ++`masquerading' in the Linux world. I call this SNAT, because you ++change the <bf>source</bf> address of the first packet. ++ ++<tag/Multiple Servers/ Sometimes you want to change where packets ++heading into your network will go. Frequently this is because (as ++above), you have only one IP address, but you want people to be able ++to get into the boxes behind the one with the `real' IP address. If ++you rewrite the destination of incoming packets, you can manage this. ++This type of NAT was called port-forwarding under previous versions of ++Linux. ++ ++<p>A common variation of this is load-sharing, where the mapping ++ranges over a set of machines, fanning packets out to them. If you're ++doing this on a serious scale, you may want to look at ++ ++<url url="http://linuxvirtualserver.org/" name="Linux Virtual Server">. ++ ++<tag/Transparent Proxying/ Sometimes you want to pretend that each ++packet which passes through your Linux box is destined for a program ++on the Linux box itself. This is used to make transparent proxies: a ++proxy is a program which stands between your network and the outside ++world, shuffling communication between the two. The transparent part ++is because your network won't even know it's talking to a proxy, ++unless of course, the proxy doesn't work. ++ ++<p>Squid can be configured to work this way, and it is called ++redirection or transparent proxying under previous Linux versions. ++</descrip> ++ ++<sect>The Two Types of NAT ++ ++<p>I divide NAT into two different types: <bf>Source NAT</bf> (SNAT) ++and <bf>Destination NAT</bf> (DNAT). ++ ++<p>Source NAT is when you alter the source address of the first ++packet: i.e. you are changing where the connection is coming from. ++Source NAT is always done post-routing, just before the packet goes ++out onto the wire. Masquerading is a specialized form of SNAT. ++ ++<p>Destination NAT is when you alter the destination address of the ++first packet: i.e. you are changing where the connection is going to. ++Destination NAT is always done before routing, when the packet first ++comes off the wire. Port forwarding, load sharing, and transparent ++proxying are all forms of DNAT. ++ ++<sect>Quick Translation From 2.0 and 2.2 Kernels ++ ++<p>Sorry to those of you still shell-shocked from the 2.0 (ipfwadm) to ++2.2 (ipchains) transition. There's good and bad news. ++ ++<p>Firstly, you can simply use ipchains and ipfwadm as before. To do ++this, you need to insmod the `ipchains.o' or `ipfwadm.o' kernel ++modules found in the latest netfilter distribution. These are ++mutually exclusive (you have been warned), and should not be combined ++with any other netfilter modules. ++ ++<p>Once one of these modules is installed, you can use ipchains and ++ipfwadm as normal, with the following differences: ++ ++<itemize> ++<item> Setting the masquerading timeouts with ipchains -M -S, or ++ ipfwadm -M -s does nothing. Since the timeouts are longer for ++ the new NAT infrastructure, this should not matter. ++ ++<item> The init_seq, delta and previous_delta fields in the verbose ++ masquerade listing are always zero. ++ ++<item> Zeroing and listing the counters at the same time `-Z -L' does ++ not work any more: the counters will not be zeroed. ++ ++<item> The backward compatibility layer doesn't scale very well for ++ large numbers of connections: don't use it for your corporate ++ gateway! ++</itemize> ++ ++Hackers may also notice: ++ ++<itemize> ++<item> You can now bind to ports 61000-65095 even if you're ++ masquerading. The masquerading code used to assume anything ++ in this range was fair game, so programs couldn't use it. ++ ++<item> The (undocumented) `getsockname' hack, which transparent proxy ++ programs could use to find out the real destinations of ++ connections no longer works. ++ ++<item> The (undocumented) bind-to-foreign-address hack is also not ++ implemented; this was used to complete the illusion of ++ transparent proxying. ++ ++</itemize> ++ ++<sect1> I just want masquerading! Help! ++ ++<p>This is what most people want. If you have a dynamically allocated ++IP PPP dialup (if you don't know, this is you), you simply want to ++tell your box that all packets coming from your internal network ++should be made to look like they are coming from the PPP dialup box. ++ ++<tscreen><verb> ++# Load the NAT module (this pulls in all the others). ++modprobe iptable_nat ++ ++# In the NAT table (-t nat), Append a rule (-A) after routing ++# (POSTROUTING) for all packets going out ppp0 (-o ppp0) which says to ++# MASQUERADE the connection (-j MASQUERADE). ++iptables -t nat -A POSTROUTING -o ppp0 -j MASQUERADE ++ ++# Turn on IP forwarding ++echo 1 > /proc/sys/net/ipv4/ip_forward ++</verb></tscreen> ++ ++Note that you are not doing any packet filtering here: for that, see ++the Packet Filtering HOWTO: `Mixing NAT and Packet Filtering'. ++ ++<sect1> What about ipmasqadm? ++ ++<p>This is a much more niche user base, so I didn't worry about ++backward compatibility as much. You can simply use `iptables -t nat' ++to do port forwarding. So for example, in Linux 2.2 you might have ++done: ++ ++<tscreen><verb> ++# Linux 2.2 ++# Forward TCP packets going to port 8080 on 1.2.3.4 to 192.168.1.1's port 80 ++ipmasqadm portfw -a -P tcp -L 1.2.3.4 8080 -R 192.168.1.1 80 ++</verb></tscreen> ++ ++Now you would do: ++ ++<tscreen><verb> ++# Linux 2.4 ++# Append a rule before routing (-A PREROUTING) to the NAT table (-t nat) that ++# TCP packets (-p tcp) going to 1.2.3.4 (-d 1.2.3.4) port 8080 (--dport 8080) ++# have their destination mapped (-j DNAT) to 192.168.1.1, port 80 ++# (--to 192.168.1.1:80). ++iptables -A PREROUTING -t nat -p tcp -d 1.2.3.4 --dport 8080 \ ++ -j DNAT --to 192.168.1.1:80 ++</verb></tscreen> ++ ++<sect>Controlling What To NAT ++ ++<p>You need to create NAT rules which tell the kernel what connections ++to change, and how to change them. To do this, we use the very ++versatile <tt>iptables</tt> tool, and tell it to alter the NAT table by ++specifying the `-t nat' option. ++ ++<p>The table of NAT rules contains three lists called `chains': each ++rule is examined in order until one matches. The two chains are ++called PREROUTING (for Destination NAT, as packets first come in), and ++POSTROUTING (for Source NAT, as packets leave). The third (OUTPUT) ++will be ignored here. ++ ++<p>The following diagram would illustrate it quite well if I had any ++artistic talent: ++ ++<tscreen><verb> ++ _____ _____ ++ / \ / \ ++ PREROUTING -->[Routing ]----------------->POSTROUTING-----> ++ \D-NAT/ [Decision] \S-NAT/ ++ | ^ ++ | | ++ | | ++ | | ++ | | ++ | | ++ | | ++ --------> Local Process ------ ++</verb></tscreen> ++ ++At each of the points above, when a packet passes we look up what ++connection it is associated with. If it's a new connection, we look ++up the corresponding chain in the NAT table to see what to do with it. ++The answer it gives will apply to all future packets on that ++connection. ++ ++<sect1>Simple Selection using iptables ++ ++<p><tt>iptables</tt> takes a number of standard options as listed ++below. All the double-dash options can be abbreviated, as long as ++<tt>iptables</tt> can still tell them apart from the other possible ++options. If your kernel has iptables support as a module, you'll need ++to load the ip_tables.o module first: `insmod ip_tables'. ++ ++<p>The most important option here is the table selection option, `-t'. ++For all NAT operations, you will want to use `-t nat' for the NAT ++table. The second most important option to use is `-A' to append a ++new rule at the end of the chain (e.g. `-A POSTROUTING'), or `-I' to ++insert one at the beginning (e.g. `-I PREROUTING'). ++ ++<p>You can specify the source (`-s' or `--source') and destination ++(`-d' or `--destination') of the packets you want to NAT. These ++options can be followed by a single IP address (e.g. 192.168.1.1), a ++name (e.g. www.gnumonks.org), or a network address ++(e.g. 192.168.1.0/24 or 192.168.1.0/255.255.255.0). ++ ++<p>You can specify the incoming (`-i' or `--in-interface') or outgoing ++(`-o' or `--out-interface') interface to match, but which you can ++specify depends on which chain you are putting the rule into: at ++PREROUTING you can only select incoming interface, and at POSTROUTING ++you can only select outgoing interface. If you use the ++wrong one, <tt>iptables</tt> will give an error. ++ ++<sect1>Finer Points Of Selecting What Packets To Mangle ++ ++<p>I said above that you can specify a source and destination address. ++If you omit the source address option, then any source address will ++do. If you omit the destination address option, then any destination ++address will do. ++ ++<p>You can also indicate a specific protocol (`-p' or `--protocol'), ++such as TCP or UDP; only packets of this protocol will match the rule. ++The main reason for doing this is that specifying a protocol of tcp or ++udp then allows extra options: specifically the `--source-port' and ++`--destination-port' options (abbreviated as `--sport' and `--dport'). ++ ++<p>These options allow you to specify that only packets with a certain ++source and destination port will match the rule. This is useful for ++redirecting web requests (TCP port 80 or 8080) and leaving other ++packets alone. ++ ++<p>These options must follow the `-p' option (which has a side-effect ++of loading the shared library extension for that protocol). You can ++use port numbers, or a name from the /etc/services file. ++ ++<p>All the different qualities you can select a packet by are detailed ++in painful detail in the manual page (<tt>man iptables</tt>). ++ ++<sect>Saying How To Mangle The Packets ++ ++<p>So now we know how to select the packets we want to mangle. To ++complete our rule, we need to tell the kernel exactly what we want it ++to do to the packets. ++ ++<sect1>Source NAT ++ ++<p>You want to do Source NAT; change the source address of connections ++to something different. This is done in the POSTROUTING chain, just ++before it is finally sent out; this is an important detail, since it ++means that anything else on the Linux box itself (routing, packet ++filtering) will see the packet unchanged. It also means that the `-o' ++(outgoing interface) option can be used. ++ ++<p>Source NAT is specified using `-j SNAT', and the `--to-source' ++option specifies an IP address, a range of IP addresses, and an ++optional port or range of ports (for UDP and TCP protocols only). ++ ++<tscreen><verb> ++## Change source addresses to 1.2.3.4. ++# iptables -t nat -A POSTROUTING -o eth0 -j SNAT --to 1.2.3.4 ++ ++## Change source addresses to 1.2.3.4, 1.2.3.5 or 1.2.3.6 ++# iptables -t nat -A POSTROUTING -o eth0 -j SNAT --to 1.2.3.4-1.2.3.6 ++ ++## Change source addresses to 1.2.3.4, ports 1-1023 ++# iptables -t nat -A POSTROUTING -p tcp -o eth0 -j SNAT --to 1.2.3.4:1-1023 ++</verb></tscreen> ++ ++<sect2>Masquerading ++ ++<p>There is a specialized case of Source NAT called masquerading: it ++should only be used for dynamically-assigned IP addresses, such as ++standard dialups (for static IP addresses, use SNAT above). ++ ++<p>You don't need to put in the source address explicitly with ++masquerading: it will use the source address of the interface the ++packet is going out from. But more importantly, if the link goes ++down, the connections (which are now lost anyway) are forgotten, ++meaning fewer glitches when connection comes back up with a new IP ++address. ++ ++<tscreen><verb> ++## Masquerade everything out ppp0. ++# iptables -t nat -A POSTROUTING -o ppp0 -j MASQUERADE ++</verb></tscreen> ++ ++<sect1>Destination NAT ++ ++<p>This is done in the PREROUTING chain, just as the packet comes in; ++this means that anything else on the Linux box itself (routing, packet ++filtering) will see the packet going to its `real' destination. It ++also means that the `-i' (incoming interface) option can be used. ++ ++<p>Destination NAT is specified using `-j DNAT', and the ++`--to-destination' option specifies an IP address, a range of IP ++addresses, and an optional port or range of ports (for UDP and TCP ++protocols only). ++ ++<tscreen><verb> ++## Change destination addresses to 5.6.7.8 ++# iptables -t nat -A PREROUTING -i eth0 -j DNAT --to 5.6.7.8 ++ ++## Change destination addresses to 5.6.7.8, 5.6.7.9 or 5.6.7.10. ++# iptables -t nat -A PREROUTING -i eth0 -j DNAT --to 5.6.7.8-5.6.7.10 ++ ++## Change destination addresses of web traffic to 5.6.7.8, port 8080. ++# iptables -t nat -A PREROUTING -p tcp --dport 80 -i eth0 \ ++ -j DNAT --to 5.6.7.8:8080 ++</verb></tscreen> ++ ++<sect2>Redirection ++ ++<p>There is a specialized case of Destination NAT called redirection: ++it is a simple convenience which is exactly equivalent to doing DNAT ++to the address of the incoming interface. ++ ++<tscreen><verb> ++## Send incoming port-80 web traffic to our squid (transparent) proxy ++# iptables -t nat -A PREROUTING -i eth1 -p tcp --dport 80 \ ++ -j REDIRECT --to-port 3128 ++</verb></tscreen> ++ ++Note that squid needs to be configured to know it's a transparent proxy! ++ ++<sect1>Mappings In Depth ++ ++<p>There are some subtleties to NAT which most people will never have ++to deal with. They are documented here for the curious. ++ ++<sect2>Selection Of Multiple Addresses in a Range ++ ++<p>If a range of IP addresses is given, the IP address to use is ++chosen based on the least currently used IP for connections the ++machine knows about. This gives primitive load-balancing. ++ ++<sect2>Creating Null NAT Mappings ++ ++<p>You can use the `-j ACCEPT' target to let a connection through ++without any NAT taking place. ++ ++<sect2>Standard NAT Behavior ++ ++<p>The default behavior is to alter the connection as little as ++possible, within the constraints of the rule given by the user. This ++means we won't remap ports unless we have to. ++ ++<sect2>Implicit Source Port Mapping ++ ++<p>Even when no NAT is requested for a connection, source port ++translation may occur implicitly, if another connection has been ++mapped over the new one. Consider the case of masquerading, which ++is rather common: ++ ++<enum> ++<item> A web connection is established by a box 192.1.1.1 from port ++ 1024 to www.netscape.com port 80. ++ ++<item> This is masqueraded by the masquerading box to use its source ++ IP address (1.2.3.4). ++ ++<item> The masquerading box tries to make a web connection to ++ www.netscape.com port 80 from 1.2.3.4 (its external interface ++ address) port 1024. ++ ++<item> The NAT code will alter the source port of the second ++ connection to 1025, so that the two don't clash. ++</enum> ++ ++<p>When this implicit source mapping occurs, ports are divided into ++three classes: ++<itemize> ++<item> Ports below 512 ++<item> Ports between 512 and 1023 ++<item> Ports 1024 and above. ++</itemize> ++ ++A port will never be implicitly mapped into a different class. ++ ++<sect2>What Happens When NAT Fails ++ ++<p>If there is no way to uniquely map a connection as the user ++requests, it will be dropped. This also applies to packets which ++could not be classified as part of any connection, because they are ++malformed, or the box is out of memory, etc. ++ ++<sect2>Multiple Mappings, Overlap and Clashes ++ ++<p>You can have NAT rules which map packets onto the same range; the ++NAT code is clever enough to avoid clashes. Hence having two rules ++which map the source address 192.168.1.1 and 192.168.1.2 respectively ++onto 1.2.3.4 is fine. ++ ++<p>Furthermore, you can map over real, used IP addresses, as long as ++those addresses pass through the mapping box as well. So if you have ++an assigned network (1.2.3.0/24), but have one internal network using ++those addresses and one using the Private Internet Addresses ++192.168.1.0/24, you can simply NAT the 192.168.1.0/24 source addresses ++onto the 1.2.3.0 network, without fear of clashing: ++ ++<tscreen><verb> ++# iptables -t nat -A POSTROUTING -s 192.168.1.0/24 -o eth1 \ ++ -j SNAT --to 1.2.3.0/24 ++</verb></tscreen> ++ ++<p>The same logic applies to addresses used by the NAT box itself: ++this is how masquerading works (by sharing the interface address ++between masqueraded packets and `real' packets coming from the box ++itself). ++ ++<p>Moreover, you can map the same packets onto many different targets, ++and they will be shared. For example, if you don't want to map ++anything over 1.2.3.5, you could do: ++ ++<tscreen><verb> ++# iptables -t nat -A POSTROUTING -s 192.168.1.0/24 -o eth1 \ ++ -j SNAT --to 1.2.3.0-1.2.3.4 --to 1.2.3.6-1.2.3.254 ++</verb></tscreen> ++ ++<sect2>Altering the Destination of Locally-Generated Connections ++ ++<p>The NAT code allows you to insert DNAT rules in the OUTPUT chain, ++but this is not fully supported in 2.4 (it can be, but it requires a ++new configuration option, some testing, and a fair bit of coding, so ++unless someone contracts Rusty to write it, I wouldn't expect it ++soon). ++ ++<p>The current limitation is that you can only change the destination ++to the local machine (e.g. `j DNAT --to 127.0.0.1'), not to any other ++machine, otherwise the replies won't be translated correctly. ++ ++<sect>Special Protocols ++ ++<p>Some protocols do not like being NAT'ed. For each of these ++protocols, two extensions must be written; one for the connection ++tracking of the protocol, and one for the actual NAT. ++ ++<p>Inside the netfilter distribution, there are currently modules for ++ftp: ip_conntrack_ftp.o and ip_nat_ftp.o. If you insmod these into ++your kernel (or you compile them in permanently), then doing any kind ++of NAT on ftp connections should work. If you don't, then you can ++only use passive ftp, and even that might not work reliably if you're ++doing more than simple Source NAT. ++ ++<sect>Caveats on NAT ++ ++<p>If you are doing NAT on a connection, all packets passing ++<bf>both</bf> ways (in and out of the network) must pass through the ++NAT'ed box, otherwise it won't work reliably. In particular, the ++connection tracking code reassembles fragments, which means that not ++only will connection tracking not be reliable, but your packets may ++not get through at all, as fragments will be withheld. ++ ++<sect>Source NAT and Routing ++ ++<p>If you are doing SNAT, you will want to make sure that every ++machine the SNAT'ed packets goes to will send replies back to the NAT ++box. For example, if you are mapping some outgoing packets onto the ++source address 1.2.3.4, then the outside router must know that it is ++to send reply packets (which will have <bf>destination</bf> 1.2.3.4) ++back to this box. This can be done in the following ways: ++ ++<enum> ++<item> If you are doing SNAT onto the box's own address (for which ++ routing and everything already works), you don't need to do ++ anything. ++ ++<item> If you are doing SNAT onto an unused address on the local LAN ++ (for example, you're mapping onto 1.2.3.99, a free IP on your ++ 1.2.3.0/24 network), your NAT box will need to respond to ARP ++ requests for that address as well as its own: the easiest way ++ to do this is create an IP alias, e.g.: ++<tscreen><verb> ++# ip address add 1.2.3.99 dev eth0 ++</verb></tscreen> ++ ++<item> If you are doing SNAT onto a completely different address, you ++ will have to ensure that the machines the SNAT packets will hit ++ will route this address back to the NAT box. This is already ++ achieved if the NAT box is their default gateway, otherwise you ++ will need to advertise a route (if running a routing protocol) ++ or manually add routes to each machine involved. ++</enum> ++ ++<sect>Destination NAT Onto the Same Network ++ ++<p>If you are doing port forwarding back onto the same network, you ++need to make sure that both future packets and reply packets pass ++through the NAT box (so they can be altered). The NAT code will now ++(since 2.4.0-test6), block the outgoing ICMP redirect which is ++produced when the NAT'ed packet heads out the same interface it came ++in on, but the receiving server will still try to reply directly to ++the client (which won't recognize the reply). ++ ++<p>The classic case is that internal staff try to access your `public' ++web server, which is actually DNAT'ed from the public address ++(1.2.3.4) to an internal machine (192.168.1.1), like so: ++ ++<tscreen><verb> ++# iptables -t nat -A PREROUTING -d 1.2.3.4 \ ++ -p tcp --dport 80 -j DNAT --to 192.168.1.1 ++</verb></tscreen> ++ ++<p>One way is to run an internal DNS server which knows the real ++(internal) IP address of your public web site, and forward all other ++requests to an external DNS server. This means that the logging on ++your web server will show the internal IP addresses correctly. ++ ++<p>The other way is to have the NAT box also map the source IP address ++to its own for these connections, fooling the server into replying ++through it. In this example, we would do the following (assuming the ++internal IP address of the NAT box is 192.168.1.250): ++ ++<tscreen><verb> ++# iptables -t nat -A POSTROUTING -d 192.168.1.1 -s 192.168.1.0/24 \ ++ -p tcp --dport 80 -j SNAT --to 192.168.1.250 ++</verb></tscreen> ++ ++Because the <bf>PREROUTING</bf> rule gets run first, the packets will ++already be destined for the internal web server: we can tell which ++ones are internally sourced by the source IP addresses. ++ ++<sect>Thanks ++ ++<p>Thanks first to WatchGuard, and David Bonn, who believed in the ++netfilter idea enough to support me while I worked on it. ++ ++<p>And to everyone else who put up with my ranting as I learnt about ++the ugliness of NAT, especially those who read my diary. ++ ++<p>Rusty. ++</article> +Index: iptables-1.4.12/howtos/netfilter-extensions-HOWTO.sgml +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ iptables-1.4.12/howtos/netfilter-extensions-HOWTO.sgml 2011-11-07 13:57:14.000000000 -0600 +@@ -0,0 +1,1781 @@ ++<!doctype linuxdoc system> ++ ++<!-- This is the Netfilter Extensions HOWTO. ++ --> ++ ++<article> ++ ++<!-- Title information --> ++ ++<title>Netfilter Extensions HOWTO ++Fabrice MARIE <fabrice@netfilter.org>, mailing list netfilter-devel@lists.samba.org ++$Revision: 1.28 $ ++ ++This document describes how to install and use current iptables extensions for netfilter. ++ ++ ++ ++ ++ ++ ++ ++Introduction
+Index: iptables-1.4.12/howtos/netfilter-hacking-HOWTO.sgml +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ iptables-1.4.12/howtos/netfilter-hacking-HOWTO.sgml 2011-11-07 13:57:14.000000000 -0600 +@@ -0,0 +1,1978 @@ ++ ++ ++ ++ ++ ++ ++
++ ++ ++ ++Linux netfilter Hacking HOWTO ++<author>Rusty Russell and Harald Welte, mailing list <tt>netfilter@lists.samba.org</tt> ++<date>$Revision: 1.14 $ $Date: 2002/07/02 04:07:19 $ ++<abstract> ++This document describes the netfilter architecture for Linux, how to ++hack it, and some of the major systems which sit on top of it, such as ++packet filtering, connection tracking and Network Address Translation. ++</abstract> ++ ++<!-- Table of contents --> ++<toc> ++ ++<!-- Begin the document --> ++ ++<sect>Introduction<label id="intro"> ++ ++<p> ++Hi guys. ++ ++<p> ++This document is a journey; some parts are well-traveled, and in ++other areas you will find yourself almost alone. The best advice I ++can give you is to grab a large, cozy mug of coffee or hot chocolate, ++get into a comfortable chair, and absorb the contents before venturing ++out into the sometimes dangerous world of network hacking. ++ ++<p>For more understanding of the use of the infrastructure on top of ++the netfilter framework, I recommend reading the Packet Filtering ++HOWTO and the NAT HOWTO. For information on kernel programming I ++suggest Rusty's Unreliable Guide to Kernel Hacking and Rusty's ++Unreliable Guide to Kernel Locking. ++ ++<p>(C) 2000 Paul `Rusty' Russell. Licenced under the GNU GPL. ++ ++<sect1>What is netfilter? ++ ++<p> ++netfilter is a framework for packet mangling, outside the normal ++Berkeley socket interface. It has four parts. Firstly, each protocol ++defines "hooks" (IPv4 defines 5) which are well-defined points in a ++packet's traversal of that protocol stack. At each of these points, ++the protocol will call the netfilter framework with the packet and the ++hook number. ++ ++<p> ++Secondly, parts of the kernel can register to listen to the different ++hooks for each protocol. So when a packet is passed to the netfilter ++framework, it checks to see if anyone has registered for that protocol ++and hook; if so, they each get a chance to examine (and possibly ++alter) the packet in order, then discard the packet ++(<tt>NF_DROP</tt>), allow it to pass (<tt>NF_ACCEPT</tt>), tell ++netfilter to forget about the packet (<tt>NF_STOLEN</tt>), or ask ++netfilter to queue the packet for userspace (<tt>NF_QUEUE</tt>). ++ ++<p> ++The third part is that packets that have been queued are collected (by ++the ip_queue driver) for sending to userspace; these packets are ++handled asynchronously. ++ ++<p> ++The final part consists of cool comments in the code and ++documentation. This is instrumental for any experimental project. ++The netfilter motto is (stolen shamelessly from Cort Dougan): ++ ++<tscreen><verb> ++ ``So... how is this better than KDE?'' ++</verb></tscreen> ++ ++<p>(This motto narrowly edged out `Whip me, beat me, make me use ++ipchains'). ++ ++<p> ++In addition to this raw framework, various modules have been written ++which provide functionality similar to previous (pre-netfilter) ++kernels, in particular, an extensible NAT system, and an extensible ++packet filtering system (iptables). ++ ++<sect1>What's wrong with what we had in 2.0 and 2.2? ++ ++<p> ++<enum> ++<item>No infrastructure established for passing packet to userspace: ++<itemize> ++<item>Kernel coding is hard ++<item>Kernel coding must be done in C/C++ ++<item>Dynamic filtering policies do not belong in kernel ++<item> 2.2 introduced copying packets to userspace via netlink, but ++ reinjecting packets is slow, and subject to `sanity' checks. ++ For example, reinjecting packet claiming to come from an ++ existing interface is not possible. ++</itemize> ++ ++<item>Transparent proxying is a crock: ++ ++<itemize> ++ ++<item> We look up <bf>every</bf> packet to see if there is a socket ++bound to that address ++ ++<item> Root is allowed to bind to foreign addresses ++ ++<item> Can't redirect locally-generated packets ++ ++<item> REDIRECT doesn't handle UDP replies: redirecting UDP named ++packets to 1153 doesn't work because some clients don't like replies ++coming from anything other than port 53. ++ ++<item> REDIRECT doesn't coordinate with tcp/udp port allocation: a ++user may get a port shadowed by a REDIRECT rule. ++ ++<item>Has been broken at least twice during 2.1 series. ++ ++<item>Code is extremely intrusive. Consider the stats on the number ++of #ifdef CONFIG_IP_TRANSPARENT_PROXY in 2.2.1: 34 occurrences in 11 ++files. Compare this with CONFIG_IP_FIREWALL, which has 10 occurrences ++in 5 files. ++</itemize> ++ ++<item>Creating packet filter rules independent of interface addresses ++ is not possible: ++ ++<itemize> ++<item>Must know local interface addresses to distinguish ++locally-generated or locally-terminating packets from through ++packets. ++ ++<item>Even that is not enough in cases of redirection or ++masquerading. ++ ++<item>Forward chain only has information on outgoing interface, ++meaning you have to figure where a packet came from using knowledge of ++the network topography. ++</itemize> ++ ++<item>Masquerading is tacked onto packet filtering:<p> ++ Interactions between packet filtering and masquerading make firewalling ++ complex: ++<itemize> ++<item>At input filtering, reply packets appear to be destined for box itself ++<item>At forward filtering, demasqueraded packets are not seen at all ++<item>At output filtering, packets appear to come from local box ++</itemize> ++ ++<item>TOS manipulation, redirect, ICMP unreachable and mark (which can ++effect port forwarding, routing, and QoS) are tacked onto packet ++filter code as well. ++ ++<item>ipchains code is neither modular, nor extensible (eg. MAC ++address filtering, options filtering, etc). ++ ++<item>Lack of sufficient infrastructure has led to a profusion of ++different techniques: ++<itemize> ++<item>Masquerading, plus per-protocol modules ++<item>Fast static NAT by routing code (doesn't have per-protocol handling) ++<item>Port forwarding, redirect, auto forwarding ++<item>The Linux NAT and Virtual Server Projects. ++</itemize> ++ ++<item>Incompatibility between CONFIG_NET_FASTROUTE and packet filtering: ++<itemize> ++<item>Forwarded packets traverse three chains anyway ++<item>No way to tell if these chains can be bypassed ++</itemize> ++ ++<item>Inspection of packets dropped due to routing protection ++(eg. Source Address Verification) not possible. ++ ++<item>No way of atomically reading counters on packet filter rules. ++ ++<item>CONFIG_IP_ALWAYS_DEFRAG is a compile-time option, making life ++difficult for distributions who want one general-purpose kernel. ++ ++</enum> ++ ++<sect1>Who are you? ++ ++<p> ++I'm the only one foolish enough to do this. As ipchains co-author and ++current Linux Kernel IP Firewall maintainer, I see many of the ++problems that people have with the current system, as well as getting ++exposure to what they are trying to do. ++ ++<sect1>Why does it crash? ++ ++<p> ++Woah! You should have seen it <bf>last</bf> week! ++ ++<p> ++Because I'm not as great a programmer as we might all wish, and I ++certainly haven't tested all scenarios, because of lack of time, ++equipment and/or inspiration. I do have a testsuite, which I ++encourage you to contribute to. ++ ++<sect>Where Can I Get The Latest? ++ ++<p>There is a CVS server on netfilter.org which contains the latest ++HOWTOs, userspace tools and testsuite. For casual browsing, you ++can use the ++<url url="http://cvs.netfilter.org/" name="Web Interface">. ++ ++To grab the latest sources, you can do the following: ++ ++<enum> ++<item> Log in to the netfilter CVS server anonymously: ++<tscreen><verb> ++cvs -d :pserver:cvs@pserver.netfilter.org:/cvspublic login ++</verb></tscreen> ++<item> When it asks you for a password type `cvs'. ++<item> Check out the code using: ++<tscreen><verb> ++# cvs -d :pserver:cvs@pserver.netfilter.org:/cvspublic co netfilter/userspace ++</verb></tscreen> ++<item> To update to the latest version, use ++<tscreen><verb> ++cvs update -d -P ++</verb></tscreen> ++</enum> ++ ++<sect>Netfilter Architecture ++ ++<p>Netfilter is merely a series of hooks in various points in a ++protocol stack (at this stage, IPv4, IPv6 and DECnet). The ++(idealized) IPv4 traversal diagram looks like the following: ++ ++<tscreen><verb> ++A Packet Traversing the Netfilter System: ++ ++ --->[1]--->[ROUTE]--->[3]--->[4]---> ++ | ^ ++ | | ++ | [ROUTE] ++ v | ++ [2] [5] ++ | ^ ++ | | ++ v | ++</verb></tscreen><label id="netfilter-traversal"> ++ ++On the left is where packets come in: having passed the simple sanity ++checks (i.e., not truncated, IP checksum OK, not a promiscuous receive), ++they are passed to the netfilter framework's NF_IP_PRE_ROUTING [1] hook. ++ ++<p> ++Next they enter the routing code, which decides whether the packet is ++destined for another interface, or a local process. The routing code ++may drop packets that are unroutable. ++ ++<p> ++If it's destined for the box itself, the netfilter framework is called ++again for the NF_IP_LOCAL_IN [2] hook, before being passed to the ++process (if any). ++ ++<p> ++If it's destined to pass to another interface instead, the netfilter ++framework is called for the NF_IP_FORWARD [3] hook. ++ ++<p> ++The packet then passes a final netfilter hook, the NF_IP_POST_ROUTING ++[4] hook, before being put on the wire again. ++ ++<p> ++The NF_IP_LOCAL_OUT [5] hook is called for packets that are created ++locally. Here you can see that routing occurs after this hook is ++called: in fact, the routing code is called first (to figure out the ++source IP address and some IP options): if you want to alter the ++routing, you must alter the `skb->dst' field yourself, as is done in ++the NAT code. ++ ++<sect1>Netfilter Base ++<p> ++Now we have an example of netfilter for IPv4, you can see when each ++hook is activated. This is the essence of netfilter. ++ ++<p> ++Kernel modules can register to listen at any of these hooks. A module ++that registers a function must specify the priority of the function ++within the hook; then when that netfilter hook is called from the core ++networking code, each module registered at that point is called in the ++order of priorites, and is free to manipulate the packet. The ++module can then tell netfilter to do one of five things: ++ ++<enum> ++<item> NF_ACCEPT: continue traversal as normal. ++<item> NF_DROP: drop the packet; don't continue traversal. ++<item> NF_STOLEN: I've taken over the packet; don't continue traversal. ++<item> NF_QUEUE: queue the packet (usually for userspace handling). ++<item> NF_REPEAT: call this hook again. ++</enum> ++ ++<p> ++The other parts of netfilter (handling queued packets, cool comments) ++will be covered in the kernel section later. ++ ++<p> ++Upon this foundation, we can build fairly complex packet ++manipulations, as shown in the next two sections. ++ ++<sect1>Packet Selection: IP Tables ++<p> ++A packet selection system called IP Tables has been built over the ++netfilter framework. It is a direct descendent of ipchains (that came ++from ipfwadm, that came from BSD's ipfw IIRC), with extensibility. ++Kernel modules can register a new table, and ask for a packet to ++traverse a given table. This packet selection method is used for ++packet filtering (the `filter' table), Network Address Translation ++(the `nat' table) and general pre-route packet mangling (the `mangle' ++table). ++ ++<p>The hooks that are registered with netfilter are as follows (with ++the functions in each hook in the order that they are actually ++called): ++ ++<tscreen><verb> ++ ++ --->PRE------>[ROUTE]--->FWD---------->POST------> ++ Conntrack | Mangle ^ Mangle ++ Mangle | Filter | NAT (Src) ++ NAT (Dst) | | Conntrack ++ (QDisc) | [ROUTE] ++ v | ++ IN Filter OUT Conntrack ++ | Conntrack ^ Mangle ++ | Mangle | NAT (Dst) ++ v | Filter ++</verb></tscreen> ++ ++<sect2>Packet Filtering ++ ++<p> ++This table, `filter', should never alter packets: only filter them. ++ ++<p> ++One of the advantages of iptables filter over ipchains is that it is ++small and fast, and it hooks into netfilter at the NF_IP_LOCAL_IN, ++NF_IP_FORWARD and NF_IP_LOCAL_OUT points. This means that for any ++given packet, there is one (and only one) possible place to filter it. ++This makes things much simpler for users than ipchains was. Also, the ++fact that the netfilter framework provides both the input and output ++interfaces for the NF_IP_FORWARD hook means that many kinds of ++filtering are far simpler. ++ ++<p> ++Note: I have ported the kernel portions of both ipchains and ipfwadm ++as modules on top of netfilter, enabling the use of the old ipfwadm ++and ipchains userspace tools without requiring an upgrade. ++ ++<sect2>NAT ++ ++<p> ++This is the realm of the `nat' table, which is fed packets from two ++netfilter hooks: for non-local packets, the NF_IP_PRE_ROUTING and ++NF_IP_POST_ROUTING hooks are perfect for destination and source ++alterations respectively. If CONFIG_IP_NF_NAT_LOCAL is defined, the ++hooks NF_IP_LOCAL_OUT and NF_IP_LOCAL_IN are used for altering the ++destination of local packets. ++ ++<p> ++This table is slightly different from the `filter' table, in that only ++the first packet of a new connection will traverse the table: the ++result of this traversal is then applied to all future packets in the ++same connection. ++ ++<sect3>Masquerading, Port Forwarding, Transparent Proxying ++ ++<p>I divide NAT into Source NAT (where the first packet has its source ++altered), and Destination NAT (the first packet has its destination ++altered). ++ ++<p>Masquerading is a special form of Source NAT: port forwarding and ++transparent proxying are special forms of Destination NAT. These are ++now all done using the NAT framework, rather than being independent ++entities. ++ ++<sect2>Packet Mangling ++ ++<p>The packet mangling table (the `mangle' table) is used for actual ++changing of packet information. Example applications are the TOS and ++TCPMSS targets. The mangle table hooks into all five netfilter hooks. ++(please note this changed with kernel 2.4.18. Previous kernels didn't ++have mangle attached to all hooks) ++ ++<sect1>Connection Tracking ++<p> ++Connection tracking is fundamental to NAT, but it is implemented as a ++separate module; this allows an extension to the packet filtering code ++to simply and cleanly use connection tracking (the `state' module). ++ ++<sect1>Other Additions ++ ++<p>The new flexibility provides both the opportunity to do really ++funky things, but for people to write enhancements or complete ++replacements that can be mixed and matched. ++ ++<sect>Information for Programmers ++ ++<p>I'll let you in on a secret: my pet hamster did all the coding. I ++was just a channel, a `front' if you will, in my pet's grand plan. ++So, don't blame me if there are bugs. Blame the cute, furry one. ++ ++<sect1>Understanding ip_tables ++ ++<p>iptables simply provides a named array of rules in memory (hence ++the name `iptables'), and such information as where packets from each ++hook should begin traversal. After a table is registered, userspace ++can read and replace its contents using getsockopt() and setsockopt(). ++ ++<p>iptables does not register with any netfilter hooks: it relies on ++other modules to do that and feed it the packets as appropriate; a ++module must register the netfilter hooks and ip_tables separately, and ++provide the mechanism to call ip_tables when the hook is reached. ++ ++<sect2> ip_tables Data Structures ++ ++<p>For convenience, the same data structure is used to represent a ++rule by userspace and within the kernel, although a few fields are ++only used inside the kernel. ++ ++<p>Each rule consists of the following parts: ++<enum> ++<item> A `struct ipt_entry'. ++<item> Zero or more `struct ipt_entry_match' structures, each with a ++ variable amount (0 or more bytes) of data appended to it. ++<item> A `struct ipt_entry_target' structure, with a variable amount ++ (0 or more bytes) of data appended to it. ++</enum> ++ ++The variable nature of the rule gives a huge amount of flexibility for ++extensions, as we'll see, especially as each match or target can carry ++an arbitrary amount of data. This does create a few traps, however: ++we have to watch out for alignment. We do this by ensuring that the ++`ipt_entry', `ipt_entry_match' and `ipt_entry_target' structures are ++conveniently sized, and that all data is rounded up to the maximal ++alignment of the machine using the IPT_ALIGN() macro. ++ ++<p> ++The `struct ipt_entry' has the following fields: ++<enum> ++<item> A `struct ipt_ip' part, containing the specifications for the ++IP header that it is to match. ++ ++<item> An `nf_cache' bitfield showing what parts of the packet this ++rule examined. ++ ++<item> A `target_offset' field indicating the offset from the ++beginning of this rule where the ipt_entry_target structure begins. ++This should always be aligned correctly (with the IPT_ALIGN macro). ++ ++<item> A `next_offset' field indicating the total size of this rule, ++including the matches and target. This should also be aligned ++correctly using the IPT_ALIGN macro. ++ ++<item> A `comefrom' field used by the kernel to track packet ++traversal. ++ ++<item> A `struct ipt_counters' field containing the packet and byte ++counters for packets which matched this rule. ++</enum> ++ ++<p> ++The `struct ipt_entry_match' and `struct ipt_entry_target' are very ++similar, in that they contain a total (IPT_ALIGN'ed) length field ++(`match_size' and `target_size' respectively) and a union holding the ++name of the match or target (for userspace), and a pointer (for the ++kernel). ++ ++<p> ++Because of the tricky nature of the rule data structure, some helper ++routines are provided: ++ ++<descrip> ++<tag>ipt_get_target()</tag> This inline function returns a pointer to ++the target of a rule. ++ ++<tag>IPT_MATCH_ITERATE()</tag> This macro calls the given function for ++every match in the given rule. The function's first argument is the ++`struct ipt_match_entry', and other arguments (if any) are those ++supplied to the IPT_MATCH_ITERATE() macro. The function must return ++either zero for the iteration to continue, or a non-zero value to ++stop. ++ ++<tag>IPT_ENTRY_ITERATE()</tag> This function takes a pointer to an ++entry, the total size of the table of entries, and a function to call. ++The functions first argument is the `struct ipt_entry', and other ++arguments (if any) are those supplied to the IPT_ENTRY_ITERATE() ++macro. The function must return either zero for the iteration to ++continue, or a non-zero value to stop. ++</descrip> ++ ++<sect2>ip_tables From Userspace ++ ++<p>Userspace has four operations: it can read the current table, read ++the info (hook positions and size of table), replace the table (and ++grab the old counters), and add in new counters. ++ ++<p>This allows any atomic operation to be simulated by userspace: this ++is done by the libiptc library, which provides convenience ++"add/delete/replace" semantics for programs. ++ ++<p>Because these tables are transferred into kernel space, alignment ++becomes an issue for machines which have different userspace and ++kernelspace type rules (eg. Sparc64 with 32-bit userland). These ++cases are handled by overriding the definition of IPT_ALIGN for these ++platforms in `libiptc.h'. ++ ++<sect2> ip_tables Use And Traversal ++ ++<p>The kernel starts traversing at the location indicated by the ++particular hook. That rule is examined, if the `struct ipt_ip' ++elements match, each `struct ipt_entry_match' is checked in turn (the ++match function associated with that match is called). If the match ++function returns 0, iteration stops on that rule. If it sets the ++`hotdrop' parameter to 1, the packet will also be immediately dropped ++(this is used for some suspicious packets, such as in the tcp match ++function). ++ ++<p>If the iteration continues to the end, the counters are ++incremented, the `struct ipt_entry_target' is examined: if it's a ++standard target, the `verdict' field is read (negative means a packet ++verdict, positive means an offset to jump to). If the answer is ++positive and the offset is not that of the next rule, the `back' ++variable is set, and the previous `back' value is placed in that ++rule's `comefrom' field. ++ ++<p>For non-standard targets, the target function is called: it returns ++a verdict (non-standard targets can't jump, as this would break the ++static loop-detection code). The verdict can be IPT_CONTINUE, to ++continue on to the next rule. ++ ++<sect1>Extending iptables ++ ++<p>Because I'm lazy, <tt>iptables</tt> is fairly extensible. This is ++basically a scam to palm off work onto other people, which is what ++Open Source is all about (cf. Free Software, which as RMS would say, ++is about freedom, and I was sitting in one of his talks when I wrote ++this). ++ ++<p>Extending <tt>iptables</tt> potentially involves two parts: ++extending the kernel, by writing a new module, and possibly extending ++the userspace program <tt>iptables</tt>, by writing a new shared ++library. ++ ++<sect2>The Kernel ++ ++<p>Writing a kernel module itself is fairly simple, as you can see ++from the examples. One thing to be aware of is that your code must be ++re-entrant: there can be one packet coming in from userspace, while ++another arrives on an interrupt. In fact in SMP there can be one ++packet on an interrupt per CPU in 2.3.4 and above. ++ ++<p> ++The functions you need to know about are: ++ ++<descrip> ++<tag>init_module()</tag> This is the entry-point of the module. It ++returns a negative error number, or 0 if it successfully registers ++itself with netfilter. ++ ++<tag>cleanup_module()</tag> This is the exit point of the module; it ++should unregister itself with netfilter. ++ ++<tag>ipt_register_match()</tag> This is used to register a new match ++type. You hand it a `struct ipt_match', which is usually declared as ++a static (file-scope) variable. ++ ++<tag>ipt_register_target()</tag> This is used to register a new ++type. You hand it a `struct ipt_target', which is usually declared as ++a static (file-scope) variable. ++ ++<tag>ipt_unregister_target()</tag> Used to unregister your target. ++ ++<tag>ipt_unregister_match()</tag> Used to unregister your match. ++</descrip> ++ ++<p>One warning about doing tricky things (such as providing counters) ++in the extra space in your new match or target. On SMP machines, the ++entire table is duplicated using memcpy for each CPU: if you really ++want to keep central information, you should see the method used in ++the `limit' match. ++ ++<sect3>New Match Functions ++ ++<p>New match functions are usually written as a standalone module. ++It's possible to have these modules extensible in turn, although it's ++usually not necessary. One way would be to use the netfilter ++framework's `nf_register_sockopt' function to allows users to talk to ++your module directly. Another way would be to export symbols for ++other modules to register themselves, the same way netfilter and ++ip_tables do. ++ ++<p>The core of your new match function is the struct ipt_match which ++it passes to `ipt_register_match()'. This structure has the following ++fields: ++ ++<descrip> ++<tag>list</tag> This field is set to any junk, say `{ NULL, NULL }'. ++ ++<tag>name</tag> This field is the name of the match function, as ++referred to by userspace. The name should match the name of the ++module (i.e., if the name is "mac", the module must be "ipt_mac.o") for ++auto-loading to work. ++ ++<tag>match</tag> This field is a pointer to a match function, which ++takes the skb, the in and out device pointers (one of which may be ++NULL, depending on the hook), a pointer to the match data in the rule ++that is worked on (the structure that was prepared in userspace), the ++IP offset (non-zero means ++a non-head fragment), a pointer to the protocol header (i.e., just ++past the IP header), the length of the data (ie. the packet length ++minus the IP header length) and finally a pointer to a `hotdrop' ++variable. It should return non-zero if the packet matches, and can ++set `hotdrop' to 1 if it returns 0, to indicate that the packet must ++be dropped immediately. ++ ++<tag>checkentry</tag> This field is a pointer to a function which ++checks the specifications for a rule; if this returns 0, then the rule ++will not be accepted from the user. For example, the "tcp" match type ++will only accept tcp packets, and so if the `struct ipt_ip' part of ++the rule does not specify that the protocol must be tcp, a zero is ++returned. The tablename argument allows your match to control what ++tables it can be used in, and the `hook_mask' is a bitmask of hooks ++this rule may be called from: if your match does not make sense from ++some netfilter hooks, you can avoid that here. ++ ++<tag>destroy</tag> This field is a pointer to a function which is ++called when an entry using this match is deleted. This allows you to ++dynamically allocate resources in checkentry and clean them up here. ++ ++<tag>me</tag> This field is set to `THIS_MODULE', which gives a ++pointer to your module. It causes the usage-count to go up and down ++as rules of that type are created and destroyed. This prevents a user ++removing the module (and hence cleanup_module() being called) if a ++rule refers to it. ++</descrip> ++ ++<sect3>New Targets ++ ++<p>If your target alters the packet (ie. the headers or the body), it ++must call skb_unshare() to copy the packet in case it is cloned: ++otherwise any raw sockets which have a clone of the skbuff will see ++the alterations (ie. people will see wierd stuff happening in ++tcpdump). ++ ++<p>New targets are also usually written as a standalone module. The ++discussions under the above section on `New Match Functions' apply ++equally here. ++ ++<p>The core of your new target is the struct ipt_target that it ++passes to ipt_register_target(). This structure has the following ++fields: ++ ++ <descrip> ++ <tag>list</tag> This field is set to any junk, say `{ NULL, NULL }'. ++ ++ <tag>name</tag> This field is the name of the target function, as ++ referred to by userspace. The name should match the name of the ++ module (i.e., if the name is "REJECT", the module must be ++ "ipt_REJECT.o") for auto-loading to work. ++ ++ <tag>target</tag> This is a pointer to the target function, which ++ takes the skbuff, the hook number, the input and output device ++ pointers (either of which may be NULL), a pointer to the target data, ++ and the position of the rule in the table. The target function may ++ return either IPT_CONTINUE (-1) if traversing should continue, or a ++ netfilter verdict (NF_DROP, NF_ACCEPT, NF_STOLEN etc.). ++ ++ <tag>checkentry</tag> This field is a pointer to a function which ++ checks the specifications for a rule; if this returns 0, then the ++ rule will not be accepted from the user. ++ ++ <tag>destroy</tag> This field is a pointer to a function which is ++ called when an entry using this target is deleted. This allows you ++ to dynamically allocate resources in checkentry and clean them up ++ here. ++ ++ <tag>me</tag> This field is set to `THIS_MODULE', which gives a ++ pointer to your module. It causes the usage-count to go up and down ++ as rules with this as a target are created and destroyed. This ++ prevents a user removing the module (and hence cleanup_module() being ++ called) if a rule refers to it. ++ </descrip> ++ ++<sect3>New Tables ++ ++<p>You can create a new table for your specific purpose if you wish. ++To do this, you call `ipt_register_table()', with a `struct ++ipt_table', which has the following fields: ++ ++ <descrip> ++ <tag>list</tag> This field is set to any junk, say `{ NULL, NULL }'. ++ ++ <tag>name</tag> This field is the name of the table function, as ++ referred to by userspace. The name should match the name of the ++ module (i.e., if the name is "nat", the module must be ++ "iptable_nat.o") for auto-loading to work. ++ ++ <tag>table</tag> This is a fully-populated `struct ipt_replace', as ++ used by userspace to replace a table. The `counters' pointer should ++ be set to NULL. This data structure can be declared `__initdata' so ++ it is discarded after boot. ++ ++ <tag>valid_hooks</tag> This is a bitmask of the IPv4 netfilter hooks ++ you will enter the table with: this is used to check that those entry ++ points are valid, and to calculate the possible hooks for ipt_match ++ and ipt_target `checkentry()' functions. ++ ++ <tag>lock</tag> This is the read-write spinlock for the entire table; ++ initialize it to RW_LOCK_UNLOCKED. ++ ++ <tag>private</tag> This is used internally by the ip_tables code. ++ </descrip> ++ ++<sect2>Userspace Tool ++ ++<p>Now you've written your nice shiny kernel module, you may want to ++control the options on it from userspace. Rather than have a branched ++version of <tt>iptables</tt> for each extension, I use the very latest ++90's technology: furbies. Sorry, I mean shared libraries. ++ ++<p>New tables generally don't require any extension to ++<tt>iptables</tt>: the user just uses the `-t' option to make it use ++the new table. ++ ++<p>The shared library should have an `_init()' function, which will ++automatically be called upon loading: the moral equivalent of the ++kernel module's `init_module()' function. This should call ++`register_match()' or `register_target()', depending on whether your ++shared library provides a new match or a new target. ++ ++<p>You need to provide a shared library: this can be used to ++initialize part of the structure, or provide additional options. I ++now insist on a shared library even if it doesn't do anything, to ++reduce problem reports where the shares libraries are missing. ++ ++<p>There are useful functions described in the `iptables.h' header, ++especially: ++<descrip> ++<tag>check_inverse()</tag> checks if an argument is actually a `!', ++and if so, sets the `invert' flag if not already set. If it returns ++true, you should increment optind, as done in the examples. ++ ++<tag>string_to_number()</tag> converts a string into a number in the ++given range, returning -1 if it is malformed or out of range. ++`string_to_number' rely on `strtol' (see the manpage), meaning ++that a leading "0x" would make the number be in Hexadecimal base, a leading ++"0" would make it be in Octal base. ++ ++<tag>exit_error()</tag> should be called if an error is found. ++Usually the first argument is `PARAMETER_PROBLEM', meaning the user ++didn't use the command line correctly. ++</descrip> ++ ++<sect3>New Match Functions ++ ++<p>Your shared library's _init() function hands `register_match()' a ++pointer to a static `struct iptables_match', which has the following ++fields: ++ ++<descrip> ++<tag>next</tag> This pointer is used to make a linked list of matches ++(such as used for listing rules). It should be set to NULL initially. ++ ++<tag>name</tag> The name of the match function. This should match the ++library name (eg "tcp" for `libipt_tcp.so'). ++ ++<tag>version</tag> Usually set to the IPTABLES_VERSION macro: this is ++used to ensure that the <tt>iptables</tt> binary doesn't pick up the ++wrong shared libraries by mistake. ++ ++<tag>size</tag> The size of the match data for this match; you should ++use the IPT_ALIGN() macro to ensure it is correctly aligned. ++ ++<tag>userspacesize</tag> For some matches, the kernel changes some ++fields internally (the `limit' target is a case of this). This means ++that a simple `memcmp()' is insufficient to compare two rules ++(required for delete-matching-rule functionality). If this is the ++case, place all the fields which do not change at the start of the ++structure, and put the size of the unchanging fields here. Usually, ++however, this will be identical to the `size' field. ++ ++<tag>help</tag> A function which prints out the option synopsis. ++ ++<tag>init</tag> This can be used to initialize the extra space (if ++any) in the ipt_entry_match structure, and set any nfcache bits; if ++you are examining something not expressible using the contents of ++`linux/include/netfilter_ipv4.h', then simply OR in the NFC_UNKNOWN ++bit. It will be called before `parse()'. ++ ++<tag>parse</tag> This is called when an unrecognized option is seen on ++the command line: it should return non-zero if the option was indeed ++for your library. `invert' is true if a `!' has already been seen. ++The `flags' pointer is for the exclusive use of your match library, ++and is usually used to store a bitmask of options which have been ++specified. Make sure you adjust the nfcache field. You may extend ++the size of the `ipt_entry_match' structure by reallocating if ++necessary, but then you must ensure that the size is passed through ++the IPT_ALIGN macro. ++ ++<tag>final_check</tag> This is called after the command line has been ++parsed, and is handed the `flags' integer reserved for your library. ++This gives you a chance to check that any compulsory options have been ++specified, for example: call `exit_error()' if this is the case. ++ ++<tag>print</tag> This is used by the chain listing code to print (to ++standard output) the extra match information (if any) for a rule. The ++numeric flag is set if the user specified the `-n' flag. ++ ++<tag>save</tag> This is the reverse of parse: it is used by ++`iptables-save' to reproduce the options which created the rule. ++ ++<tag>extra_opts</tag> This is a NULL-terminated list of extra options ++which your library offers. This is merged with the current options ++and handed to getopt_long; see the man page for details. The return ++code for getopt_long becomes the first argument (`c') to your ++`parse()' function. ++</descrip> ++ ++There are extra elements at the end of this structure for use ++internally by <tt>iptables</tt>: you don't need to set them. ++ ++<sect3>New Targets ++ ++<p>Your shared library's _init() function hands `register_target()' it ++a pointer to a static `struct iptables_target', which has similar ++fields to the iptables_match structure detailed above. ++ ++<sect2>Using `libiptc' ++ ++<p><tt>libiptc</tt> is the iptables control library, designed for ++listing and manipulating rules in the iptables kernel module. While ++its current use is for the iptables program, it makes writing other ++tools fairly easy. You need to be root to use these functions. ++ ++<p>The kernel tables themselves are simply a table of rules, and a set ++of numbers representing entry points. Chain names ("INPUT", etc) are ++provided as an abstraction by the library. User defined chains are ++labelled by inserting an error node before the head of the ++user-defined chain, which contains the chain name in the extra data ++section of the target (the builtin chain positions are defined by the ++three table entry points). ++ ++<p>The following standard targets are supported: ACCEPT, DROP, QUEUE ++(which are translated to NF_ACCEPT, NF_DROP, and NF_QUEUE, ++respectively), RETURN (which is translated to a special IPT_RETURN ++value handled by ip_tables), and JUMP (which is translated from the ++chain name to an actual offset within the table). ++ ++<p>When `iptc_init()' is called, the table, including the counters, is ++read. This table is manipulated by the `iptc_insert_entry()', ++`iptc_replace_entry()', `iptc_append_entry()', `iptc_delete_entry()', ++`iptc_delete_num_entry()', `iptc_flush_entries()', ++`iptc_zero_entries()', `iptc_create_chain()' `iptc_delete_chain()', ++and `iptc_set_policy()' functions. ++ ++<p>The table changes are not written back until the `iptc_commit()' ++function is called. This means it is possible for two library users ++operating on the same chain to race each other; locking would be ++required to prevent this, and it is not currently done. ++ ++<p>There is no race with counters, however; counters are added back in ++to the kernel in such a way that counter increments between the ++reading and writing of the table still show up in the new table. ++ ++<p>There are various helper functions: ++ ++<descrip> ++<tag>iptc_first_chain()</tag> This function returns the first chain ++name in the table. ++ ++<tag>iptc_next_chain()</tag> This function returns the next chain name ++in the table: NULL means no more chains. ++ ++<tag>iptc_builtin()</tag> Returns true if the given chain name is the ++name of a builtin chain. ++ ++<tag>iptc_first_rule()</tag> This returns a pointer to the first rule ++in the given chain name: NULL for an empty chain. ++ ++<tag>iptc_next_rule()</tag> This returns a pointer to the next rule in ++the chain: NULL means the end of the chain. ++ ++<tag>iptc_get_target()</tag> This gets the target of the given rule. If ++it's an extended target, the name of that target is returned. If it's ++a jump to another chain, the name of that chain is returned. If it's ++a verdict (eg. DROP), that name is returned. If it has no target (an ++accounting-style rule), then the empty string is returned. ++ ++<p>Note that this function should be used instead of using the value ++of the `verdict' field of the ipt_entry structure directly, as it ++offers the above further interpretations of the standard verdict. ++ ++<tag>iptc_get_policy()</tag> This gets the policy of a builtin chain, ++and fills in the `counters' argument with the hit statistics on that ++policy. ++ ++<tag>iptc_strerror()</tag> This function returns a more meaningful ++explanation of a failure code in the iptc library. If a function ++fails, it will always set errno: this value can be passed to ++iptc_strerror() to yield an error message. ++</descrip> ++ ++<sect1>Understanding NAT ++ ++<p>Welcome to Network Address Translation in the kernel. Note that ++the infrastructure offered is designed more for completeness than raw ++efficiency, and that future tweaks may increase the efficiency ++markedly. For the moment I'm happy that it works at all. ++ ++<p>NAT is separated into connection tracking (which doesn't manipulate ++packets at all), and the NAT code itself. Connection tracking is also ++designed to be used by an iptables modules, so it makes subtle ++distinctions in states which NAT doesn't care about. ++ ++<sect2>Connection Tracking ++ ++<p>Connection tracking hooks into high-priority NF_IP_LOCAL_OUT and ++NF_IP_PRE_ROUTING hooks, in order to see packets before they enter the ++system. ++ ++<p>The nfct field in the skb is a pointer to inside the struct ++ip_conntrack, at one of the infos[] array. Hence we can tell the ++state of the skb by which element in this array it is pointing to: ++this pointer encodes both the state structure and the relationship of ++this skb to that state. ++ ++<p>The best way to extract the `nfct' field is to call ++`ip_conntrack_get()', which returns NULL if it's not set, or the ++connection pointer, and fills in ctinfo which describes the ++relationship of the packet to that connection. This enumerated type ++has several values: ++ ++<descrip> ++ ++<tag>IP_CT_ESTABLISHED</tag> The packet is part of an established ++connection, in the original direction. ++ ++<tag>IP_CT_RELATED</tag> The packet is related to the connection, and ++is passing in the original direction. ++ ++<tag>IP_CT_NEW</tag> The packet is trying to create a new connection ++(obviously, it is in the original direction). ++ ++<tag>IP_CT_ESTABLISHED + IP_CT_IS_REPLY</tag> The packet is part of an ++established connection, in the reply direction. ++ ++<tag>IP_CT_RELATED + IP_CT_IS_REPLY</tag> The packet is related to the ++connection, and is passing in the reply direction. ++</descrip> ++ ++Hence a reply packet can be identified by testing for >= ++IP_CT_IS_REPLY. ++ ++<sect1>Extending Connection Tracking/NAT ++ ++<p>These frameworks are designed to accommodate any number of protocols ++and different mapping types. Some of these mapping types might be ++quite specific, such as a load-balancing/fail-over mapping type. ++ ++<p>Internally, connection tracking converts a packet to a "tuple", ++representing the interesting parts of the packet, before searching for ++bindings or rules which match it. This tuple has a manipulatable ++part, and a non-manipulatable part; called "src" and "dst", as this is ++the view for the first packet in the Source NAT world (it'd be a reply ++packet in the Destination NAT world). The tuple for every packet in ++the same packet stream in that direction is the same. ++ ++<p>For example, a TCP packet's tuple contains the manipulatable part: ++source IP and source port, the non-manipulatable part: destination IP ++and the destination port. The manipulatable and non-manipulatable ++parts do not need to be the same type though; for example, an ICMP ++packet's tuple contains the manipulatable part: source IP and the ICMP ++id, and the non-manipulatable part: the destination IP and the ICMP ++type and code. ++ ++<p>Every tuple has an inverse, which is the tuple of the reply packets ++in the stream. For example, the inverse of an ICMP ping packet, icmp ++id 12345, from 192.168.1.1 to 1.2.3.4, is a ping-reply packet, icmp id ++12345, from 1.2.3.4 to 192.168.1.1. ++ ++<p>These tuples, represented by the `struct ip_conntrack_tuple', are used ++widely. In fact, together with the hook the packet came in on (which ++has an effect on the type of manipulation expected), and the device ++involved, this is the complete information on the packet. ++ ++<p>Most tuples are contained within a `struct ++ip_conntrack_tuple_hash', which adds a doubly linked list entry, and a ++pointer to the connection that the tuple belongs to. ++ ++<p>A connection is represented by the `struct ip_conntrack': it has ++two `struct ip_conntrack_tuple_hash' fields: one referring to the ++direction of the original packet (tuplehash[IP_CT_DIR_ORIGINAL]), and ++one referring to packets in the reply direction ++(tuplehash[IP_CT_DIR_REPLY]). ++ ++<p>Anyway, the first thing the NAT code does is to see if the ++connection tracking code managed to extract a tuple and find an ++existing connection, by looking at the skbuff's nfct field; this tells ++us if it's an attempt on a new connection, or if not, which direction ++it is in; in the latter case, then the manipulations determined ++previously for that connection are done. ++ ++<p>If it was the start of a new connection, we look for a rule for that ++tuple, using the standard iptables traversal mechanism, on the `nat' ++table. If a rule matches, it is used to initialize the manipulations ++for both that direction and the reply; the connection-tracking code is ++told that the reply it should expect has changed. Then, it's ++manipulated as above. ++ ++<p>If there is no rule, a `null' binding is created: this usually does ++not map the packet, but exists to ensure we don't map another stream ++over an existing one. Sometimes, the null binding cannot be created, ++because we have already mapped an existing stream over it, in which ++case the per-protocol manipulation may try to remap it, even though ++it's nominally a `null' binding. ++ ++<sect2>Standard NAT Targets ++ ++<p>NAT targets are like any other iptables target extensions, except ++they insist on being used only in the `nat' table. Both the SNAT and ++DNAT targets take a `struct ip_nat_multi_range' as their extra data; ++this is used to specify the range of addresses a mapping is allowed to ++bind into. A range element, `struct ip_nat_range' consists of an ++inclusive minimum and maximum IP address, and an inclusive maximum and ++minimum protocol-specific value (eg. TCP ports). There is also room ++for flags, which say whether the IP address can be mapped (sometimes ++we only want to map the protocol-specific part of a tuple, not the ++IP), and another to say that the protocol-specific part of the range ++is valid. ++ ++<p>A multi-range is an array of these `struct ip_nat_range' elements; ++this means that a range could be "1.1.1.1-1.1.1.2 ports 50-55 AND ++1.1.1.3 port 80". Each range element adds to the range (a union, for ++those who like set theory). ++ ++<sect2>New Protocols ++ ++<sect3> Inside The Kernel ++ ++<p>Implementing a new protocol first means deciding what the ++manipulatable and non-manipulatable parts of the tuple should be. ++Everything in the tuple has the property that it identifies the stream ++uniquely. The manipulatable part of the tuple is the part you can do ++NAT with: for TCP this is the source port, for ICMP it's the icmp ID; ++something to use as a "stream identifier". The non-manipulatable part ++is the rest of the packet that uniquely identifies the stream, but we ++can't play with (eg. TCP destination port, ICMP type). ++ ++<p>Once you've decided this, you can write an extension to the ++connection-tracking code in the directory, and go about populating the ++`ip_conntrack_protocol' structure which you need to pass to ++`ip_conntrack_register_protocol()'. ++ ++<p>The fields of `struct ip_conntrack_protocol' are: ++ ++<descrip> ++<tag>list</tag> Set it to '{ NULL, NULL }'; used to sew you into the list. ++ ++<tag>proto</tag> Your protocol number; see `/etc/protocols'. ++ ++<tag>name</tag> The name of your protocol. This is the name the user ++will see; it's usually best if it's the canonical name in ++`/etc/protocols'. ++ ++<tag>pkt_to_tuple</tag> The function which fills out the protocol ++specific parts of the tuple, given the packet. The `datah' pointer ++points to the start of your header (just past the IP header), and the ++datalen is the length of the packet. If the packet isn't long enough ++to contain the header information, return 0; datalen will always be ++at least 8 bytes though (enforced by framework). ++ ++<tag>invert_tuple</tag> This function is simply used to change the ++protocol-specific part of the tuple into the way a reply to that ++packet would look. ++ ++<tag>print_tuple</tag> This function is used to print out the ++protocol-specific part of a tuple; usually it's sprintf()'d into the ++buffer provided. The number of buffer characters used is returned. ++This is used to print the states for the /proc entry. ++ ++<tag>print_conntrack</tag> This function is used to print the private ++part of the conntrack structure, if any, also used for printing the ++states in /proc. ++ ++<tag>packet</tag> This function is called when a packet is seen which ++is part of an established connection. You get a pointer to the ++conntrack structure, the IP header, the length, and the ctinfo. You ++return a verdict for the packet (usually NF_ACCEPT), or -1 if the ++packet is not a valid part of the connection. You can delete the ++connection inside this function if you wish, but you must use the ++following idiom to avoid races (see ip_conntrack_proto_icmp.c): ++ ++<tscreen><verb> ++if (del_timer(&ct->timeout)) ++ ct->timeout.function((unsigned long)ct); ++</verb></tscreen> ++ ++<tag>new</tag> This function is called when a packet creates a ++connection for the first time; there is no ctinfo arg, since the first ++packet is of ctinfo IP_CT_NEW by definition. It returns 0 to fail to ++create the connection, or a connection timeout in jiffies. ++</descrip> ++ ++Once you've written and tested that you can track your new protocol, ++it's time to teach NAT how to translate it. This means writing a new ++module; an extension to the NAT code and go about populating the ++`ip_nat_protocol' structure which you need to pass to ++`ip_nat_protocol_register()'. ++ ++<descrip> ++<tag>list</tag> Set it to '{ NULL, NULL }'; used to sew you into the list. ++ ++<tag>name</tag> The name of your protocol. This is the name the user ++will see; it's best if it's the canonical name in `/etc/protocols' for ++userspace auto-loading, as we'll see later. ++ ++<tag>protonum</tag> Your protocol number; see `/etc/protocols'. ++ ++<tag>manip_pkt</tag> This is the other half of connection tracking's ++pkt_to_tuple function: you can think of it as "tuple_to_pkt". There ++are some differences though: you get a pointer to the start of the IP ++header, and the total packet length. This is because some protocols ++(UDP, TCP) need to know the IP header. You're given the ++ip_nat_tuple_manip field from the tuple (i.e., the "src" field), rather ++than the entire tuple, and the type of manipulation you are to ++perform. ++ ++<tag>in_range</tag> This function is used to tell if manipulatable ++part of the given tuple is in the given range. This function is a bit ++tricky: we're given the manipulation type which has been applied to ++the tuple, which tells us how to interpret the range (is it a source ++range or a destination range we're aiming for?). ++ ++<p>This function is used to check if an existing mapping puts us in ++the right range, and also to check if no manipulation is necessary at ++all. ++ ++<tag>unique_tuple</tag> This function is the core of NAT: given a ++tuple and a range, we're to alter the per-protocol part of the tuple ++to place it within the range, and make it unique. If we can't find an ++unused tuple in the range, return 0. We also get a pointer to the ++conntrack structure, which is required for ip_nat_used_tuple(). ++ ++<p>The usual approach is to simply iterate the per-protocol part of ++the tuple through the range, checking `ip_nat_used_tuple()' on it, ++until one returns false. ++ ++<p>Note that the null-mapping case has already been checked: it's ++either outside the range given, or already taken. ++ ++<p>If IP_NAT_RANGE_PROTO_SPECIFIED isn't set, it means that the user ++is doing NAT, not NAPT: do something sensible with the range. If no ++mapping is desirable (for example, within TCP, a destination mapping ++should not change the TCP port unless ordered to), return 0. ++ ++<tag>print</tag> Given a character buffer, a match tuple and a mask, ++write out the per-protocol parts and return the length of the buffer ++used. ++ ++<tag>print_range</tag> Given a character buffer and a range, write out ++the per-protocol part of the range, and return the length of the ++buffer used. This won't be called if the IP_NAT_RANGE_PROTO_SPECIFIED ++flag wasn't set for the range. ++</descrip> ++ ++<sect2>New NAT Targets ++ ++<p>This is the really interesting part. You can write new NAT targets ++which provide a new mapping type: two extra targets are provided in ++the default package: MASQUERADE and REDIRECT. These are fairly simple ++to illustrate the potential and power of writing a new NAT target. ++ ++<p>These are written just like any other iptables targets, but ++internally they will extract the connection and call ++`ip_nat_setup_info()'. ++ ++<sect2>Protocol Helpers ++ ++<p>Protocol helpers for connection tracking allow the connection ++tracking code to understand protocols which use multiple network ++connections (eg. FTP) and mark the `child' connections as being ++related to the initial connection, usually by reading the related ++address out of the data stream. ++ ++<p>Protocol helpers for NAT do two things: firstly allow the NAT code ++to manipulate the data stream to change the address contained within ++it, and secondly to perform NAT on the related connection when it ++comes in, based on the original connection. ++ ++<sect2>Connection Tracking Helper Modules ++ ++<sect3>Description ++<p> ++The duty of a connection tracking module is to specify which packets ++belong to an already established connection. The module has the ++following means to do that: ++ ++<itemize> ++<item>Tell netfilter which packets our module is interested in (most ++helpers operate on a particular port). ++ ++<item>Register a function with netfilter. This function is called for ++every packet which matches the criteria above. ++ ++<item>An `ip_conntrack_expect_related()' function which can be called ++from there to tell netfilter to expect related connections.</item> ++</itemize> ++ ++<p> ++If there is some additional work to be done at the time the first packet ++of the expected connection arrives, the module can register a callback ++function which is called at that time. ++ ++<sect3>Structures and Functions Available ++ ++<p>Your kernel module's init function has to call ++`ip_conntrack_helper_register()' with a pointer to a ++`struct ip_conntrack_helper'. This struct has the following fields: ++ ++<descrip> ++<tag>list</tag>This is the header for the linked list. Netfilter ++handles this list internally. Just initialize it with `{ NULL, NULL }'. ++ ++<tag>name</tag>This is a pointer to a string constant specifying the ++name of the protocol. ("ftp", "irc", ...) ++ ++<tag>flags</tag>A set of flags with one or more out of the following flgs: ++<itemize> ++<item>IP_CT_HELPER_F_REUSE_EXPECT : Reuse expectations if the limit (see ++`max_expected` below) is reached.</item> ++</itemize> ++ ++<tag>me</tag>A pointer to the module structure of the helper. Intitialize this with the `THIS_MODULE' macro. ++ ++<tag>max_expected</tag>Maximum number of unconfirmed (outstanding) expectations. ++ ++<tag>timeout</tag>Timeout (in seconds) for each unconfirmed expectation. An expectation is deleted `timeout' seconds after the expectation was issued with the `ip_conntrack_expect_related()' function. ++ ++<tag>tuple</tag>This is a `struct ip_conntrack_tuple' which specifies ++the packets our conntrack helper module is interested in. ++ ++<tag>mask</tag>Again a `struct ip_conntrack_tuple'. This mask ++specifies which bits of <tt>tuple</tt> are valid. ++ ++<tag>help</tag>The function which netfilter should call for each ++packet matching tuple+mask ++</descrip> ++ ++<sect3>Example skeleton of a conntrack helper module ++<p> ++<tscreen><code> ++#define FOO_PORT 111 ++ ++static int foo_expectfn(struct ip_conntrack *new) ++{ ++ /* called when the first packet of an expected ++ connection arrives */ ++ ++ return 0; ++} ++ ++static int foo_help(const struct iphdr *iph, size_t len, ++ struct ip_conntrack *ct, ++ enum ip_conntrack_info ctinfo) ++{ ++ /* analyze the data passed on this connection and ++ decide how related packets will look like */ ++ ++ /* update per master-connection private data ++ (session state, ...) */ ++ ct->help.ct_foo_info = ... ++ ++ if (there_will_be_new_packets_related_to_this_connection) ++ { ++ struct ip_conntrack_expect exp; ++ ++ memset(&exp, 0, sizeof(exp)); ++ exp.t = tuple_specifying_related_packets; ++ exp.mask = mask_for_above_tuple; ++ exp.expectfn = foo_expectfn; ++ exp.seq = tcp_sequence_number_of_expectation_cause; ++ ++ /* per slave-connection private data */ ++ exp.help.exp_foo_info = ... ++ ++ ip_conntrack_expect_related(ct, &exp); ++ } ++ return NF_ACCEPT; ++} ++ ++static struct ip_conntrack_helper foo; ++ ++static int __init init(void) ++{ ++ memset(&foo, 0, sizeof(struct ip_conntrack_helper); ++ ++ foo.name = "foo"; ++ foo.flags = IP_CT_HELPER_F_REUSE_EXPECT; ++ foo.me = THIS_MODULE; ++ foo.max_expected = 1; /* one expectation at a time */ ++ foo.timeout = 0; /* expectation never expires */ ++ ++ /* we are interested in all TCP packets with destport 111 */ ++ foo.tuple.dst.protonum = IPPROTO_TCP; ++ foo.tuple.dst.u.tcp.port = htons(FOO_PORT); ++ foo.mask.dst.protonum = 0xFFFF; ++ foo.mask.dst.u.tcp.port = 0xFFFF; ++ foo.help = foo_help; ++ ++ return ip_conntrack_helper_register(&foo); ++} ++ ++static void __exit fini(void) ++{ ++ ip_conntrack_helper_unregister(&foo); ++} ++</code></tscreen> ++ ++ ++<sect2>NAT helper modules ++ ++<sect3>Description ++<p> ++NAT helper modules do some application specific NAT handling. Usually ++this includes on-the-fly manipulation of data: think about the PORT ++command in FTP, where the client tells the server which IP/port to ++connect to. Therefor an FTP helper module must replace the IP/port ++after the PORT command in the FTP control connection. ++ ++<p> ++If we are dealing with TCP, things get slightly more complicated. The ++reason is a possible change of the packet size (FTP example: the ++length of the string representing an IP/port tuple after the PORT ++command has changed). If we change the packet size, we have a syn/ack ++difference between left and right side of the NAT box. (i.e. if we had ++extended one packet by 4 octets, we have to add this offset to the TCP ++sequence number of each following packet). ++ ++<p> ++Special NAT handling of all related packets is required, too. Take as ++example again FTP, where all incoming packets of the DATA connection ++have to be NATed to the IP/port given by the client with the PORT ++command on the control connection, rather than going through the ++normal table lookup. ++ ++<itemize> ++<item>callback for the packet causing the related connection (foo_help) ++<item>callback for all related packets (foo_nat_expected) ++</itemize> ++ ++<sect3>Structures and Functions Available ++ ++<p>Your nat helper module's `init()' function calls ++`ip_nat_helper_register()' with a pointer to a `struct ++ip_nat_helper'. This struct has the following members: ++ ++<descrip> ++<tag>list</tag>Just again the list header for netfilters internal use. ++Initialize this with { NULL, NULL }. ++ ++<tag>name</tag>A pointer to a string constant with the protocol's name ++ ++<tag>flags</tag>A set out of zero, one or more of the following flags: ++<itemize> ++<item>IP_NAT_HELPER_F_ALWAYS : Call the NAT helper for every packet, ++not only for packets where conntrack has detected an expectation-cause.</item> ++<item>IP_NAT_HELPER_F_STANDALONE : Tell the NAT core that this protocol ++doesn't have a conntrack helper, only a NAT helper.</item> ++</itemize> ++ ++<tag>me</tag>A pointer to the module structure of the helper. Initialize ++this using the `THIS_MODULE' macro. ++ ++<tag>tuple</tag>a `struct ip_conntrack_tuple' describing which packets ++our NAT helper is interested in. ++ ++<tag>mask</tag>a `struct ip_conntrack_tuple', telling netfilter which ++bits of <tt>tuple</tt> are valid. ++ ++<tag>help</tag>The help function which is called for each packet ++matching tuple+mask. ++ ++<tag>expect</tag>The expect function which is called for every first ++packet of an expected connection. ++ ++</descrip> ++ ++This is very similar to writing a connection tracking helper. ++ ++<sect3>Example NAT helper module ++<p> ++<tscreen><code> ++#define FOO_PORT 111 ++ ++static int foo_nat_expected(struct sk_buff **pksb, ++ unsigned int hooknum, ++ struct ip_conntrack *ct, ++ struct ip_nat_info *info) ++/* called whenever the first packet of a related connection arrives. ++ params: pksb packet buffer ++ hooknum HOOK the call comes from (POST_ROUTING, PRE_ROUTING) ++ ct information about this (the related) connection ++ info &ct->nat.info ++ return value: Verdict (NF_ACCEPT, ...) ++{ ++ /* Change ip/port of the packet to the masqueraded ++ values (read from master->tuplehash), to map it the same way, ++ call ip_nat_setup_info, return NF_ACCEPT. */ ++ ++} ++ ++static int foo_help(struct ip_conntrack *ct, ++ struct ip_conntrack_expect *exp, ++ struct ip_nat_info *info, ++ enum ip_conntrack_info ctinfo, ++ unsigned int hooknum, ++ struct sk_buff **pksb) ++/* called for every packet where conntrack detected an expectation-cause ++ params: ct struct ip_conntrack of the master connection ++ exp struct ip_conntrack_expect of the expectation ++ caused by the conntrack helper for this protocol ++ info (STATE: related, new, established, ... ) ++ hooknum HOOK the call comes from (POST_ROUTING, PRE_ROUTING) ++ pksb packet buffer ++*/ ++{ ++ ++ /* extract information about future related packets (you can ++ share information with the connection tracking's foo_help). ++ Exchange address/port with masqueraded values, insert tuple ++ about related packets */ ++} ++ ++static struct ip_nat_helper hlpr; ++ ++static int __init(void) ++{ ++ int ret; ++ ++ memset(&hlpr, 0, sizeof(struct ip_nat_helper)); ++ hlpr.list = { NULL, NULL }; ++ hlpr.tuple.dst.protonum = IPPROTO_TCP; ++ hlpr.tuple.dst.u.tcp.port = htons(FOO_PORT); ++ hlpr.mask.dst.protonum = 0xFFFF; ++ hlpr.mask.dst.u.tcp.port = 0xFFFF; ++ hlpr.help = foo_help; ++ hlpr.expect = foo_nat_expect; ++ ++ ret = ip_nat_helper_register(hlpr); ++ ++ return ret; ++} ++ ++static void __exit(void) ++{ ++ ip_nat_helper_unregister(&hlpr); ++} ++</code></tscreen> ++ ++<sect1>Understanding Netfilter ++ ++<p>Netfilter is pretty simple, and is described fairly thoroughly in ++the previous sections. However, sometimes it's necessary to go ++beyond what the NAT or ip_tables infrastructure offers, or you may ++want to replace them entirely. ++ ++<p>One important issue for netfilter (well, in the future) is caching. ++Each skb has an `nfcache' field: a bitmask of what fields in the ++header were examined, and whether the packet was altered or not. The ++idea is that each hook off netfilter OR's in the bits relevant to it, ++so that we can later write a cache system which will be clever enough ++to realize when packets do not need to be passed through netfilter at ++all. ++ ++<p>The most important bits are NFC_ALTERED, meaning the packet was ++altered (this is already used for IPv4's NF_IP_LOCAL_OUT hook, to ++reroute altered packets), and NFC_UNKNOWN, which means caching should ++not be done because some property which cannot be expressed was ++examined. If in doubt, simply set the NFC_UNKNOWN flag on the skb's ++nfcache field inside your hook. ++ ++<sect1>Writing New Netfilter Modules ++ ++<sect2> Plugging Into Netfilter Hooks ++ ++<p> To receive/mangle packets inside the kernel, you can simply write ++a module which registers a "netfilter hook". This is basically an ++expression of interest at some given point; the actual points are ++protocol-specific, and defined in protocol-specific netfilter headers, ++such as "netfilter_ipv4.h". ++ ++<p> To register and unregister netfilter hooks, you use the functions ++`nf_register_hook' and `nf_unregister_hook'. These each take a ++pointer to a `struct nf_hook_ops', which you populate as follows: ++ ++<descrip> ++<tag>list</tag> Used to sew you into the linked list: set to '{ NULL, ++NULL }' ++ ++<tag>hook</tag> The function which is called when a packet hits this ++hook point. Your function must return NF_ACCEPT, NF_DROP or NF_QUEUE. ++If NF_ACCEPT, the next hook attached to that point will be called. If ++NF_DROP, the packet is dropped. If NF_QUEUE, it's queued. You ++receive a pointer to an skb pointer, so you can entirely replace the ++skb if you wish. ++ ++<tag>flush</tag> Currently unused: designed to pass on packet hits ++when the cache is flushed. May never be implemented: set it to NULL. ++ ++<tag>pf</tag> The protocol family, eg, `PF_INET' for IPv4. ++ ++<tag>hooknum</tag> The number of the hook you are interested in, eg ++`NF_IP_LOCAL_OUT'. ++</descrip> ++ ++<sect2> Processing Queued Packets ++ ++<p>This interface is currently used by ip_queue; you can register to ++handle queued packets for a given protocol. This has similar semantics ++to registering for a hook, except you can block processing the packet, ++and you only see packets for which a hook has replied `NF_QUEUE'. ++ ++<p>The two functions used to register interest in queued packets are ++`nf_register_queue_handler()' and `nf_unregister_queue_handler()'. The ++function you register will be called with the `void *' pointer you ++handed it to `nf_register_queue_handler()'. ++ ++<p> ++If no-one is registered to handle a protocol, then returning NF_QUEUE ++is equivalent to returning NF_DROP. ++ ++<p> ++Once you have registered interest in queued packets, they begin ++queueing. You can do whatever you want with them, but you must call ++`nf_reinject()' when you are finished with them (don't simply ++kfree_skb() them). When you reinject an skb, you hand it the skb, the ++`struct nf_info' which your queue handler was given, and a verdict: ++NF_DROP causes them to be dropped, NF_ACCEPT causes them to continue ++to iterate through the hooks, NF_QUEUE causes them to be queued again, ++and NF_REPEAT causes the hook which queued the packet to be consulted ++again (beware infinite loops). ++ ++<p>You can look inside the `struct nf_info' to get auxiliary ++information about the packet, such as the interfaces and hook it was ++on. ++ ++<sect2> Receiving Commands From Userspace ++ ++<p>It is common for netfilter components to want to interact with ++userspace. The method for doing this is by using the setsockopt ++mechanism. Note that each protocol must be modified to call ++nf_setsockopt() for setsockopt numbers it doesn't understand (and ++nf_getsockopt() for getsockopt numbers), and so far only IPv4, IPv6 ++and DECnet have been modified. ++ ++<p>Using a now-familiar technique, we register a `struct ++nf_sockopt_ops' using the nf_register_sockopt() call. The fields of ++this structure are as follows: ++ ++<descrip> ++<tag>list</tag> Used to sew it into the linked list: set to '{ NULL, ++NULL }'. ++ ++<tag>pf</tag> The protocol family you handle, eg. PF_INET. ++ ++<tag>set_optmin</tag> and ++<tag>set_optmax</tag> ++ ++These specify the (exclusive) range of setsockopt numbers handled. ++Hence using 0 and 0 means you have no setsockopt numbers. ++ ++<tag>set</tag> This is the function called when the user calls one of ++your setsockopts. You should check that they have NET_ADMIN ++capability within this function. ++ ++<tag>get_optmin</tag> and ++<tag>get_optmax</tag> ++ ++These specify the (exclusive) range of getsockopt numbers handled. ++Hence using 0 and 0 means you have no getsockopt numbers. ++ ++<tag>get</tag> This is the function called when the user calls one of ++your getsockopts. You should check that they have NET_ADMIN ++capability within this function. ++</descrip> ++ ++<p>The final two fields are used internally. ++ ++<sect1>Packet Handling in Userspace ++ ++<p>Using the libipq library and the `ip_queue' module, almost anything ++which can be done inside the kernel can now be done in userspace. ++This means that, with some speed penalty, you can develop your code ++entirely in userspace. Unless you are trying to filter large ++bandwidths, you should find this approach superior to in-kernel packet ++mangling. ++ ++<p>In the very early days of netfilter, I proved this by porting an ++embryonic version of iptables to userspace. Netfilter opens the doors ++for more people to write their own, fairly efficient netmangling ++modules, in whatever language they want. ++ ++<sect>Translating 2.0 and 2.2 Packet Filter Modules ++ ++<p>Look at the ip_fw_compat.c file for a simple layer which should ++make porting quite simple. ++ ++<sect>Netfilter Hooks for Tunnel Writers ++ ++<p>Authors of tunnel (or encapsulation) drivers should follow two ++simple rules for the 2.4 kernel (as do the drivers inside the kernel, ++like net/ipv4/ipip.c): ++ ++<itemize> ++<item> ++Release skb->nfct if you're going to make the packet unrecognisable ++(ie. decapsulating/encapsulating). You don't need to do this if you ++unwrap it into a *new* skb, but if you're going to do it in place, you ++must do this. ++ ++<p>Otherwise: the NAT code will use the old connection tracking ++information to mangle the packet, with bad consequences. ++ ++<item>Make sure the encapsulated packets go through the LOCAL_OUT ++hook, and decapsulated packets go through the PRE_ROUTING hook (most ++tunnels use ip_rcv(), which does this for you). ++ ++<p>Otherwise: the user will not be able to filter as they expect to with ++tunnels. ++</itemize> ++ ++<p>The canonical way to do the first is to insert code like the ++following before you wrap or unwrap the packet: ++ ++<tscreen><verb> ++ /* Tell the netfilter framework that this packet is not the ++ same as the one before! */ ++#ifdef CONFIG_NETFILTER ++ nf_conntrack_put(skb->nfct); ++ skb->nfct = NULL; ++#ifdef CONFIG_NETFILTER_DEBUG ++ skb->nf_debug = 0; ++#endif ++#endif ++</verb></tscreen> ++ ++<p>Usually, all you need to do for the second, is to find where the ++newly encapsulated packet goes into "ip_send()", and replace it with ++something like: ++ ++<tscreen><verb> ++ /* Send "new" packet from local host */ ++ NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, ip_send); ++</verb></tscreen> ++ ++<p> Following these rules means that the person setting up the packet ++filtering rules on the tunnel box will see something like the ++following sequence for a packet being tunnelled: ++ ++<enum> ++<item> FORWARD hook: normal packet (from eth0 -> tunl0) ++<item> LOCAL_OUT hook: encapsulated packet (to eth1). ++</enum> ++ ++And for the reply packet: ++<enum> ++<item> LOCAL_IN hook: encapsulated reply packet (from eth1) ++<item> FORWARD hook: reply packet (from eth1 -> eth0). ++</enum> ++ ++<sect>The Test Suite ++ ++<p>Within the CVS repository lives a test suite: the more the test ++suite covers, the greater confidence you can have that changes to the ++code hasn't quietly broken something. Trivial tests are at least as ++important as tricky tests: it's the trivial tests which simplify the ++complex tests (since you know the basics work fine before the complex ++test gets run). ++ ++<p>The tests are simple: they are just shell scripts under the ++testsuite/ subdirectory which are supposed to succeed. The scripts ++are run in alphabetical order, so `01test' is run before `02test'. ++Currently there are 5 test directories: ++ ++<descrip> ++<tag>00netfilter/</tag> General netfilter framework tests. ++<tag>01iptables/</tag> iptables tests. ++<tag>02conntrack/</tag> connection tracking tests. ++<tag>03NAT/</tag> NAT tests ++<tag>04ipchains-compat/</tag> ipchains/ipfwadm compatibility tests ++</descrip> ++ ++Inside the testsuite/ directory is a script called `test.sh'. It ++configures two dummy interfaces (tap0 and tap1), turns forwarding on, ++and removes all netfilter modules. Then it runs through the ++directories above and runs each of their test.sh scripts until one ++fails. This script takes two optional arguments: `-v' meaning to ++print out each test as it proceeds, and an optional test name: if this ++is given, it will skip over all tests until this one is found. ++ ++<sect1>Writing a Test ++ ++<p>Create a new file in the appropriate directory: try to number your ++test so that it gets run at the right time. For example, in order to ++test ICMP reply tracking (02conntrack/02reply.sh), we need to first ++check that outgoing ICMPs are tracked properly ++(02conntrack/01simple.sh). ++ ++<p>It's usually better to create many small files, each of which ++covers one area, because it helps to isolate problems immediately for ++people running the testsuite. ++ ++<p>If something goes wrong in the test, simply do an `exit 1', which ++causes failure; if it's something you expect may fail, you should ++print a unique message. Your test should end with `exit 0' if ++everything goes OK. You should check the success of <bf>every</bf> ++command, either using `set -e' at the top of the script, or ++appending `|| exit 1' to the end of each command. ++ ++<p>The helper functions `load_module' and `remove_module' can be used ++to load modules: you should never rely on autoloading in the testsuite ++unless that is what you are specifically testing. ++ ++<sect1>Variables And Environment ++ ++<p>You have two play interfaces: tap0 and tap1. Their interface ++addresses are in variables <tt>$TAP0</tt> and <tt>$TAP1</tt> ++respectively. They both have netmasks of 255.255.255.0; their ++networks are in $TAP0NET and $TAP1NET respectively. ++ ++<p>There is an empty temporary file in $TMPFILE. It is deleted at the ++end of your test. ++ ++<p>Your script will be run from the testsuite/ directory, wherever it ++is. Hence you should access tools (such as iptables) using path ++starting with `../userspace'. ++ ++<p>Your script can print out more information if $VERBOSE is set ++(meaning that the user specified `-v' on the command line). ++ ++<sect1>Useful Tools ++ ++<p> ++There are several useful testsuite tools in the "tools" subdirectory: ++each one exits with a non-zero exit status if there is a problem. ++ ++<sect2>gen_ip ++ ++<p>You can generate IP packets using `gen_ip', which outputs an IP ++packet to standard input. You can feed packets in the tap0 and tap1 ++by sending standard output to /dev/tap0 and /dev/tap1 (these are ++created upon first running the testsuite if they don't exist). ++ ++<p>gen_ip is a simplistic program which is currently very fussy about ++its argument order. First are the general optional arguments: ++ ++<descrip> ++ ++<tag>FRAG=offset,length</tag> Generate the packet, then turn it into a ++ fragment at the following offset and length. ++ ++<tag>MF</tag> Set the `More Fragments' bit on the packet. ++ ++<tag>MAC=xx:xx:xx:xx:xx:xx</tag> Set the source MAC address on the ++ packet. ++ ++<tag>TOS=tos</tag> Set the TOS field on the packet (0 to 255). ++ ++</descrip> ++ ++Next come the compulsory arguments: ++ ++<descrip> ++<tag>source ip</tag> Source IP address of the packet. ++ ++<tag>dest ip</tag> Destination IP address of the packet. ++ ++<tag>length</tag> Total length of the packet, including headers. ++ ++<tag>protocol</tag> Protocol number of the packet, eg 17 = UDP. ++ ++</descrip> ++ ++Then the arguments depend on the protocol: for UDP (17), they are the ++source and destination port numbers. For ICMP (1), they are the type ++and code of the ICMP message: if the type is 0 or 8 (ping-reply or ++ping), then two additional arguments (the ID and sequence fields) are ++required. For TCP, the source and destination ports, and flags ++("SYN", "SYN/ACK", "ACK", "RST" or "FIN") are required. There are ++three optional arguments: "OPT=" followed by a comma-separated list of ++options, "SYN=" followed by a sequence number, and "ACK=" followed by ++a sequence number. Finally, the optional argument "DATA" indicates ++that the payload of the TCP packet is to be filled with the contents ++of standard input. ++ ++<sect2>rcv_ip ++ ++<p>You can see IP packets using `rcv_ip', which prints out the command ++line as close as possible to the original value fed to gen_ip ++(fragments are the exception). ++ ++<p>This is extremely useful for analyzing packets. It takes two ++compulsory arguments: ++ ++<descrip> ++<tag>wait time</tag> The maximum time in seconds to wait for a packet ++ from standard input. ++ ++<tag>iterations</tag> The number of packets to receive. ++</descrip> ++ ++There is one optional argument, "DATA", which causes the payload of a ++TCP packet to be printed on standard output after the packet header. ++ ++<p>The standard way to use `rcv_ip' in a shell script is as follows: ++ ++<verb> ++# Set up job control, so we can use & in shell scripts. ++set -m ++ ++# Wait two seconds for one packet from tap0 ++../tools/rcv_ip 2 1 < /dev/tap0 > $TMPFILE & ++ ++# Make sure that rcv_ip has started running. ++sleep 1 ++ ++# Send a ping packet ++../tools/gen_ip $TAP1NET.2 $TAP0NET.2 100 1 8 0 55 57 > /dev/tap1 || exit 1 ++ ++# Wait for rcv_ip, ++if wait %../tools/rcv_ip; then : ++else ++ echo rcv_ip failed: ++ cat $TMPFILE ++ exit 1 ++fi ++</verb> ++ ++<sect2>gen_err ++ ++<p>This program takes a packet (as generated by gen_ip, for example) ++on standard input, and turns it into an ICMP error. ++ ++<p>It takes three arguments: a source IP address, a type and a code. ++The destination IP address will be set to the source IP address of the ++packet fed in standard input. ++ ++<sect2>local_ip ++ ++<p>This takes a packet from standard input and injects it into the ++system from a raw socket. This give the appearance of a ++locally-generated packet (as separate from feeding a packet in one of ++the ethertap devices, which looks like a remotely-generated packet). ++ ++<sect1>Random Advice ++ ++<p>All the tools assume they can do everything in one read or write: ++this is true for the ethertap devices, but might not be true if you're ++doing something tricky with pipes. ++ ++<p>dd can be used to cut packets: dd has an obs (output block size) ++option which can be used to make it output the packet in a single ++write. ++ ++<p>Test for success first: eg. testing that packets are successfully ++blocked. First test that packets pass through normally, <bf>then</bf> ++test that some packets are blocked. Otherwise an unrelated failure ++could be stopping the packets... ++ ++<p>Try to write exact tests, not `throw random stuff and see what ++happens' tests. If an exact test goes wrong, it's a useful thing to ++know. If a random test goes wrong once, it doesn't help much. ++ ++<p>If a test fails without a message, you can add `-x' to the top line ++of the script (ie. `#! /bin/sh -x') to see what commands it's running. ++ ++<p>If a test fails randomly, check for random network traffic ++interfering (try downing all your external interfaces). Sitting on ++the same network as Andrew Tridgell, I tend to get plagued by Windows ++broadcasts, for example. ++ ++<sect>Motivation ++ ++<p>As I was developing ipchains, I realized (in one of those ++blinding-flash-while-waiting-for-entree moments in a Chinese ++restaurant in Sydney) that packet filtering was being done in the ++wrong place. I can't find it now, but I remember sending mail to Alan ++Cox, who kind of said `why don't you finish what you're doing, first, ++even though you're probably right'. In the short term, pragmatism was ++to win over The Right Thing. ++ ++<p>After I finished ipchains, which was initially going to be a minor ++modification of the kernel part of ipfwadm, and turned into a larger ++rewrite, and wrote the HOWTO, I became aware of just how much ++confusion there is in the wider Linux community about issues like ++packet filtering, masquerading, port forwarding and the like. ++ ++<p>This is the joy of doing your own support: you get a closer feel ++for what the users are trying to do, and what they are struggling ++with. Free software is most rewarding when it's in the hands of the ++most users (that's the point, right?), and that means making it easy. ++The architecture, not the documentation, was the key flaw. ++ ++<p>So I had the experience, with the ipchains code, and a good idea of ++what people out there were doing. There were only two problems. ++ ++<p>Firstly, I didn't want to get back into security. Being a security ++consultant is a constant moral tug-of-war between your conscience and ++your wallet. At a fundamental level, you are selling the feeling of ++security, which is at odds with actual security. Maybe working in a ++military setting, where they understand security, it'd be different. ++ ++<p>The second problem is that newbie users aren't the only concern; an ++increasing number of large companies and ISPs are using this stuff. I ++needed reliable input from that class of users if it was to scale to ++tomorrow's home users. ++ ++<p>These problems were resolved, when I ran into David Bonn, of ++WatchGuard fame, at Usenix in July 1998. They were looking for a ++Linux kernel coder; in the end we agreed that I'd head across to their ++Seattle offices for a month and we'd see if we could hammer out an ++agreement whereby they'd sponsor my new code, and my current support ++efforts. The rate we agreed on was more than I asked, so I didn't ++take a pay cut. This means I don't have to even think about external ++conslutting for a while. ++ ++<p>Exposure to WatchGuard gave me exposure to the large clients I ++need, and being independent from them allowed me to support all users ++(eg. WatchGuard competitors) equally. ++ ++<p>So I could have simply written netfilter, ported ipchains over the ++top, and been done with it. Unfortunately, that would leave all the ++masquerading code in the kernel: making masquerading independent from ++filtering is the one of the major wins point of moving the packet ++filtering points, but to do that masquerading also needed to be moved ++over to the netfilter framework as well. ++ ++<p>Also, my experience with ipfwadm's `interface-address' feature (the ++one I removed in ipchains) had taught me that there was no hope of ++simply ripping out the masquerading code and expecting someone who ++needed it to do the work of porting it onto netfilter for me. ++ ++<p>So I needed to have at least as many features as the current code; ++preferably a few more, to encourage niche users to become early ++adopters. This means replacing transparent proxying (gladly!), ++masquerading and port forwarding. In other words, a complete NAT layer. ++ ++<p>Even if I had decided to port the existing masquerading layer, ++instead of writing a generic NAT system, the masquerading code was ++showing its age, and lack of maintenance. See, there was no ++masquerading maintainer, and it shows. It seems that serious users ++generally don't use masquerading, and there aren't many home users up ++to the task of doing maintenance. Brave people like Juan Ciarlante ++were doing fixes, but it had reached to the stage (being extended over ++and over) that a rewrite was needed. ++ ++<p>Please note that I wasn't the person to do a NAT rewrite: I didn't ++use masquerading any more, and I'd not studied the existing code at ++the time. That's probably why it took me longer than it should have. ++But the result is fairly good, in my opinion, and I sure as hell ++learned a lot. No doubt the second version will be even better, once ++we see how people use it. ++ ++<sect>Thanks ++ ++<p>Thanks to those who helped, expecially Harald Welte for writing the ++Protocol Helpers section. ++</article> +Index: iptables-1.4.12/howtos/packet-filtering-HOWTO.sgml +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ iptables-1.4.12/howtos/packet-filtering-HOWTO.sgml 2011-11-07 13:57:14.000000000 -0600 +@@ -0,0 +1,1339 @@ ++<!doctype linuxdoc system> ++ ++<!-- This is the Linux Packet Filtering HOWTO. ++ --> ++ ++<!-- $Id: packet-filtering-HOWTO.sgml,v 1.26 2002/01/24 13:42:53 laforge Exp $ --> ++ ++<article> ++ ++<!-- Title information --> ++ ++<title>Linux 2.4 Packet Filtering HOWTO ++<author>Rusty Russell, mailing list <tt>netfilter@lists.samba.org</tt> ++<date>$Revision: 1.26 $ $Date: 2002/01/24 13:42:53 $ ++<abstract> ++This document describes how to use iptables to filter out bad packets ++for the 2.4 Linux kernels. ++</abstract> ++ ++<!-- Table of contents --> ++<toc> ++ ++<!-- Begin the document --> ++ ++<sect>Introduction<label id="intro"> ++ ++<p> ++Welcome, gentle reader. ++ ++<p> ++It is assumed you know what an IP address, a network address, a ++netmask, routing and DNS are. If not, I recommend that you read the ++Network Concepts HOWTO. ++ ++<p> ++This HOWTO flips between a gentle introduction (which will leave you ++feeling warm and fuzzy now, but unprotected in the Real World) and raw ++full-disclosure (which would leave all but the hardiest souls ++confused, paranoid and seeking heavy weaponry). ++ ++<p> ++Your network is not <bf>secure</bf>. The problem of allowing rapid, ++convenient communication while restricting its use to good, and not ++evil intents is congruent to other intractable problems such as ++allowing free speech while disallowing a call of ``Fire!'' in a ++crowded theater. It will not be solved in the space of this HOWTO. ++ ++<p> ++So only you can decide where the compromise will be. I will try to ++instruct you in the use of some of the tools available and some ++vulnerabilities to be aware of, in the hope that you will use them for ++good, and not evil purposes. Another equivalent problem. ++ ++<p>(C) 2000 Paul `Rusty' Russell. Licenced under the GNU GPL. ++ ++<sect>Where is the official Web Site? Is there a Mailing List? ++ ++<p>There are three official sites: ++<itemize> ++<item>Thanks to <url url="http://netfilter.filewatcher.org/" name="Filewatcher">. ++<item>Thanks to <url url="http://netfilter.samba.org/" name="The Samba Team and SGI">. ++<item>Thanks to <url url="http://netfilter.gnumonks.org/" name="Harald Welte">. ++</itemize> ++<p> You can reach all of them using round-robin DNS via ++<url url="http://www.netfilter.org/"> and <url url="http://www.iptables.org/"> ++ ++<p>For the official netfilter mailing list, see ++<url url="http://www.netfilter.org/contact.html#list" name="netfilter List">. ++ ++<sect>So What's A Packet Filter? ++ ++<p> ++A packet filter is a piece of software which looks at the ++<em>header</em> of packets as they pass through, and decides the fate ++of the entire packet. It might decide to <bf>DROP</bf> the packet ++(i.e., discard the packet as if it had never received it), ++<bf>ACCEPT</bf> the packet (i.e., let the packet go through), or ++something more complicated. ++ ++<p> ++Under Linux, packet filtering is built into the kernel (as a kernel ++module, or built right in), and there are a few trickier things we can ++do with packets, but the general principle of looking at the headers ++and deciding the fate of the packet is still there. ++ ++<sect1>Why Would I Want to Packet Filter? ++ ++<p> ++Control. Security. Watchfulness. ++ ++<p> ++<descrip> ++<tag/Control:/ when you are using a Linux box to connect your internal ++network to another network (say, the Internet) you have an opportunity ++to allow certain types of traffic, and disallow others. For example, ++the header of a packet contains the destination address of the packet, ++so you can prevent packets going to a certain part of the outside ++network. As another example, I use Netscape to access the Dilbert ++archives. There are advertisements from doubleclick.net on the page, ++and Netscape wastes my time by cheerfully downloading them. ++Telling the packet filter not to allow any packets to or from the ++addresses owned by doubleclick.net solves that problem (there are ++better ways of doing this though: see Junkbuster). ++ ++<tag/Security:/ when your Linux box is the only thing between the ++chaos of the Internet and your nice, orderly network, it's nice to ++know you can restrict what comes tromping in your door. For example, ++you might allow anything to go out from your network, but you might be ++worried about the well-known `Ping of Death' coming in from malicious ++outsiders. As another example, you might not want outsiders ++telnetting to your Linux box, even though all your accounts have ++passwords. Maybe you want (like most people) to be an observer on the ++Internet, and not a server (willing or otherwise). Simply don't let ++anyone connect in, by having the packet filter reject incoming packets ++used to set up connections. ++ ++<tag/Watchfulness:/ sometimes a badly configured machine on the local ++network will decide to spew packets to the outside world. It's nice ++to tell the packet filter to let you know if anything abnormal occurs; ++maybe you can do something about it, or maybe you're just curious by ++nature. ++</descrip> ++ ++<sect1>How Do I Packet Filter Under Linux?<label id="filter-linux"> ++ ++<p>Linux kernels have had packet filtering since the 1.1 series. The ++first generation, based on ipfw from BSD, was ported by Alan Cox in ++late 1994. This was enhanced by Jos Vos and others for Linux 2.0; the ++userspace tool `ipfwadm' controlled the kernel filtering rules. In ++mid-1998, for Linux 2.2, I reworked the kernel quite heavily, with the ++help of Michael Neuling, and introduced the userspace tool `ipchains'. ++Finally, the fourth-generation tool, `iptables', and another kernel ++rewrite occurred in mid-1999 for Linux 2.4. It is this iptables which ++this HOWTO concentrates on. ++ ++<p> ++You need a kernel which has the netfilter infrastructure in it: ++netfilter is a general framework inside the Linux kernel which other ++things (such as the iptables module) can plug into. This means you ++need kernel 2.3.15 or beyond, and answer `Y' to CONFIG_NETFILTER in ++the kernel configuration. ++ ++<p> ++The tool <tt>iptables</tt> talks to the kernel and tells it what ++packets to filter. Unless you are a programmer, or overly curious, ++this is how you will control the packet filtering. ++ ++<sect2> iptables ++ ++<p> ++The <tt>iptables</tt> tool inserts and deletes rules from the kernel's ++packet filtering table. This means that whatever you set up, it will ++be lost upon reboot; see <ref id="permanent" name="Making Rules ++Permanent"> for how to make sure they are restored the next time Linux ++is booted. ++ ++<p> ++<tt>iptables</tt> is a replacement for <tt>ipfwadm</tt> and ++<tt>ipchains</tt>: see ++<ref id="oldstyle" name="Using ipchains and ipfwadm"> for how to painlessly ++avoid using iptables if you're using one of those tools. ++ ++<sect2> Making Rules Permanent<label id="permanent"> ++ ++<p>Your current firewall setup is stored in the kernel, and thus will ++be lost on reboot. You can try the iptables-save and iptables-restore ++scripts to save them to, and restore them from a file. ++ ++<p>The other way is to put the commands required to set up your rules ++in an initialization script. Make sure you do something intelligent ++if one of the commands should fail (usually `exec /sbin/sulogin'). ++ ++<sect>Who the hell are you, and why are you playing with my kernel? ++ ++<p> ++I'm Rusty Russell; the Linux IP Firewall maintainer and just another ++working coder who happened to be in the right place at the right time. ++I wrote ipchains (see <ref id="filter-linux" name="How Do I Packet ++Filter Under Linux?"> above for due credit to the people who did the ++actual work), and learnt enough to get packet filtering right this ++time. I hope. ++ ++<p> ++<url url="http://www.watchguard.com" name="WatchGuard">, an excellent ++firewall company who sell the really nice plug-in Firebox, offered to ++pay me to do nothing, so I could spend all my time writing this stuff, ++and maintaining my previous stuff. I predicted 6 months, and it took ++12, but I felt by the end that it had been done Right. Many rewrites, ++a hard-drive crash, a laptop being stolen, a couple of corrupted ++filesystems and one broken screen later, here it is. ++ ++<p> ++While I'm here, I want to clear up some people's misconceptions: I am ++no kernel guru. I know this, because my kernel work has brought me ++into contact with some of them: David S. Miller, Alexey Kuznetsov, ++Andi Kleen, Alan Cox. However, they're all busy doing the deep magic, ++leaving me to wade in the shallow end where it's safe. ++ ++<!-- This is probably no longer true; somewhere in writing all this ++kernel code and documentation I seem to have picked up a fair number ++of kernel tricks. But I'm still nowhere near as clever as I think I ++am. --> ++ ++<sect> Rusty's Really Quick Guide To Packet Filtering ++ ++<p> ++Most people just have a single PPP connection to the Internet, and ++don't want anyone coming back into their network, or the firewall: ++ ++<tscreen><verb> ++## Insert connection-tracking modules (not needed if built into kernel). ++# insmod ip_conntrack ++# insmod ip_conntrack_ftp ++ ++## Create chain which blocks new connections, except if coming from inside. ++# iptables -N block ++# iptables -A block -m state --state ESTABLISHED,RELATED -j ACCEPT ++# iptables -A block -m state --state NEW -i ! ppp0 -j ACCEPT ++# iptables -A block -j DROP ++ ++## Jump to that chain from INPUT and FORWARD chains. ++# iptables -A INPUT -j block ++# iptables -A FORWARD -j block ++</verb></tscreen> ++ ++<sect> How Packets Traverse The Filters ++ ++<p> ++The kernel starts with three lists of rules in the `filter' table; ++these lists are called <bf>firewall chains</bf> or just ++<bf>chains</bf>. The three chains are called <bf>INPUT</bf>, ++<bf>OUTPUT</bf> and <bf>FORWARD</bf>. ++ ++<p> ++For ASCII-art fans, the chains are arranged like so: <bf>(Note: this ++is a very different arrangement from the 2.0 and 2.2 kernels!)</bf> ++ ++<verb> ++ _____ ++Incoming / \ Outgoing ++ -->[Routing ]--->|FORWARD|-------> ++ [Decision] \_____/ ^ ++ | | ++ v ____ ++ ___ / \ ++ / \ |OUTPUT| ++ |INPUT| \____/ ++ \___/ ^ ++ | | ++ ----> Local Process ---- ++</verb> ++ ++<p>The three circles represent the three chains mentioned above. When ++a packet reaches a circle in the diagram, that chain is examined to ++decide the fate of the packet. If the chain says to DROP the packet, ++it is killed there, but if the chain says to ACCEPT the packet, it ++continues traversing the diagram. ++ ++<p> ++A chain is a checklist of <bf>rules</bf>. Each rule says `if the packet ++header looks like this, then here's what to do with the packet'. If ++the rule doesn't match the packet, then the next rule in the chain is ++consulted. Finally, if there are no more rules to consult, then the ++kernel looks at the chain <bf>policy</bf> to decide what to do. In a ++security-conscious system, this policy usually tells the kernel to ++DROP the packet. ++ ++<p> ++<enum> ++<item>When a packet comes in (say, through the Ethernet card) the kernel ++first looks at the destination of the packet: this is called ++`routing'. ++ ++<item>If it's destined for this box, the packet passes downwards ++in the diagram, to the INPUT chain. If it passes this, any processes ++waiting for that packet will receive it. ++ ++<item>Otherwise, if the kernel does not have forwarding enabled, or it ++doesn't know how to forward the packet, the packet is dropped. If ++forwarding is enabled, and the packet is destined for another network ++interface (if you have another one), then the packet goes rightwards ++on our diagram to the FORWARD chain. If it is ACCEPTed, it will be ++sent out. ++ ++<item>Finally, a program running on the box can send network packets. ++These packets pass through the OUTPUT chain immediately: if it says ++ACCEPT, then the packet continues out to whatever interface it is ++destined for. ++</enum> ++ ++<sect>Using iptables ++ ++<p> ++iptables has a fairly detailed manual page (<tt>man iptables</tt>), ++and if you need more detail on particulars. Those of you familiar ++with ipchains may simply want to look at <ref id="Appendix-A" ++name="Differences Between iptables and ipchains">; they are very ++similar. ++ ++<p> ++There are several different things you can do with <tt>iptables</tt>. ++You start with three built-in chains <tt>INPUT</tt>, <tt>OUTPUT</tt> ++and <tt>FORWARD</tt> which you can't delete. Let's look at the ++operations to manage whole chains: ++ ++<enum> ++<item> Create a new chain (-N). ++<item> Delete an empty chain (-X). ++<item> Change the policy for a built-in chain. (-P). ++<item> List the rules in a chain (-L). ++<item> Flush the rules out of a chain (-F). ++<item> Zero the packet and byte counters on all rules in a chain (-Z). ++</enum> ++ ++There are several ways to manipulate rules inside a chain: ++ ++<enum> ++<item> Append a new rule to a chain (-A). ++<item> Insert a new rule at some position in a chain (-I). ++<item> Replace a rule at some position in a chain (-R). ++<item> Delete a rule at some position in a chain, or the first that matches (-D). ++</enum> ++ ++<sect1> What You'll See When Your Computer Starts Up ++ ++<p> ++iptables may be a module, called (`iptable_filter.o'), which should be ++automatically loaded when you first run <tt>iptables</tt>. It can ++also be built into the kernel permenantly. ++ ++<p>Before any iptables commands have been run (be careful: some ++distributions will run iptables in their initialization scripts), ++there will be no rules in any of the built-in chains (`INPUT', ++`FORWARD' and `OUTPUT'), all the chains will have a policy of ACCEPT. ++You can alter the default policy of the FORWARD chain by providing the ++`forward=0' option to the iptable_filter module. ++ ++<sect1> Operations on a Single Rule ++ ++<p> ++This is the bread-and-butter of packet filtering; manipulating rules. ++Most commonly, you will probably use the append (-A) and delete (-D) ++commands. The others (-I for insert and -R for replace) are simple ++extensions of these concepts. ++ ++<p> ++Each rule specifies a set of conditions the packet must meet, and what ++to do if it meets them (a `target'). For example, you might want to ++drop all ICMP packets coming from the IP address 127.0.0.1. So in ++this case our conditions are that the protocol must be ICMP and that ++the source address must be 127.0.0.1. Our target is `DROP'. ++ ++<p> ++127.0.0.1 is the `loopback' interface, which you will have even if you ++have no real network connection. You can use the `ping' program to ++generate such packets (it simply sends an ICMP type 8 (echo request) ++which all cooperative hosts should obligingly respond to with an ICMP ++type 0 (echo reply) packet). This makes it useful for testing. ++ ++<tscreen><verb> ++# ping -c 1 127.0.0.1 ++PING 127.0.0.1 (127.0.0.1): 56 data bytes ++64 bytes from 127.0.0.1: icmp_seq=0 ttl=64 time=0.2 ms ++ ++--- 127.0.0.1 ping statistics --- ++1 packets transmitted, 1 packets received, 0% packet loss ++round-trip min/avg/max = 0.2/0.2/0.2 ms ++# iptables -A INPUT -s 127.0.0.1 -p icmp -j DROP ++# ping -c 1 127.0.0.1 ++PING 127.0.0.1 (127.0.0.1): 56 data bytes ++ ++--- 127.0.0.1 ping statistics --- ++1 packets transmitted, 0 packets received, 100% packet loss ++# ++</verb></tscreen> ++ ++You can see here that the first ping succeeds (the `-c 1' tells ping ++to only send a single packet). ++ ++<p> ++Then we append (-A) to the `INPUT' chain, a rule specifying that for ++packets from 127.0.0.1 (`-s 127.0.0.1') with protocol ICMP (`-p icmp') ++we should jump to DROP (`-j DROP'). ++ ++<p> ++Then we test our rule, using the second ping. There will be a pause ++before the program gives up waiting for a response that will never ++come. ++ ++<p> ++We can delete the rule in one of two ways. Firstly, since we know ++that it is the only rule in the input chain, we can use a numbered ++delete, as in: ++<tscreen><verb> ++ # iptables -D INPUT 1 ++ # ++</verb></tscreen> ++To delete rule number 1 in the INPUT chain. ++ ++<p> ++The second way is to mirror the -A command, but replacing the -A with ++-D. This is useful when you have a complex chain of rules and you ++don't want to have to count them to figure out that it's rule 37 that ++you want to get rid of. In this case, we would use: ++<tscreen><verb> ++ # iptables -D INPUT -s 127.0.0.1 -p icmp -j DROP ++ # ++</verb></tscreen> ++The syntax of -D must have exactly the same options as the -A (or -I ++or -R) command. If there are multiple identical rules in the same ++chain, only the first will be deleted. ++ ++<sect1>Filtering Specifications ++ ++<p> ++We have seen the use of `-p' to specify protocol, and `-s' to specify ++source address, but there are other options we can use to specify ++packet characteristics. What follows is an exhaustive compendium. ++ ++<sect2>Specifying Source and Destination IP Addresses ++ ++<p> ++Source (`-s', `--source' or `--src') and destination (`-d', ++`--destination' or `--dst') IP addresses can be specified in four ++ways. The most common way is to use the full name, such as ++`localhost' or `www.linuxhq.com'. The second way is to specify the IP ++address such as `127.0.0.1'. ++ ++<p> ++The third and fourth ways allow specification of a group of IP ++addresses, such as `199.95.207.0/24' or `199.95.207.0/255.255.255.0'. ++These both specify any IP address from 199.95.207.0 to 199.95.207.255 ++inclusive; the digits after the `/' tell which parts of the IP address ++are significant. `/32' or `/255.255.255.255' is the default (match ++all of the IP address). To specify any IP address at all `/0' can be ++used, like so: ++<tscreen><verb> ++ [ NOTE: `-s 0/0' is redundant here. ] ++ # iptables -A INPUT -s 0/0 -j DROP ++ # ++</verb></tscreen> ++ ++This is rarely used, as the effect above is the same as not specifying ++the `-s' option at all. ++ ++<sect2>Specifying Inversion ++ ++<p> ++Many flags, including the `-s' (or `--source') and `-d' ++(`--destination') flags can have their arguments preceded by `!' ++(pronounced `not') to match addresses NOT equal to the ones given. ++For example. `-s ! localhost' matches any packet <bf>not</bf> coming ++from localhost. ++ ++<sect2>Specifying Protocol ++ ++<p> ++The protocol can be specified with the `-p' (or `--protocol') flag. ++Protocol can be a number (if you know the numeric protocol values for ++IP) or a name for the special cases of `TCP', `UDP' or `ICMP'. Case ++doesn't matter, so `tcp' works as well as `TCP'. ++ ++<p> ++The protocol name can be prefixed by a `!', to invert it, such as `-p ++! TCP' to specify packets which are <bf>not</bf> TCP. ++ ++<sect2>Specifying an Interface ++ ++<p> ++The `-i' (or `--in-interface') and `-o' (or `--out-interface') options ++specify the name of an <bf>interface</bf> to match. An interface is ++the physical device the packet came in on (`-i') or is going out on ++(`-o'). You can use the <tt>ifconfig</tt> command to list the ++interfaces which are `up' (i.e., working at the moment). ++ ++<p> ++Packets traversing the <tt>INPUT</tt> chain don't have an output ++interface, so any rule using `-o' in this chain will never match. ++Similarly, packets traversing the <tt>OUTPUT</tt> chain don't have an ++input interface, so any rule using `-i' in this chain will never match. ++ ++<p>Only packets traversing the <tt>FORWARD</tt> chain have both an ++input and output interface. ++ ++<p> ++It is perfectly legal to specify an interface that currently does not ++exist; the rule will not match anything until the interface comes up. ++This is extremely useful for dial-up PPP links (usually interface ++<tt>ppp0</tt>) and the like. ++ ++<p> ++As a special case, an interface name ending with a `+' will match all ++interfaces (whether they currently exist or not) which begin with that ++string. For example, to specify a rule which matches all PPP ++interfaces, the <tt>-i ppp+</tt> option would be used. ++ ++<p> ++The interface name can be preceded by a `!' with spaces around it, to ++match a packet which does <bf>not</bf> match the specified ++interface(s), eg <tt>-i ! ppp+</tt>. ++ ++<sect2>Specifying Fragments ++ ++<p> ++Sometimes a packet is too large to fit down a wire all at once. When ++this happens, the packet is divided into <bf>fragments</bf>, and sent ++as multiple packets. The other end reassembles these fragments to ++reconstruct the whole packet. ++ ++<p> ++The problem with fragments is that the initial fragment has the ++complete header fields (IP + TCP, UDP and ICMP) to examine, but ++subsequent packets only have a subset of the headers (IP without the ++additional protocol fields). Thus looking inside subsequent fragments ++for protocol headers (such as is done by the TCP, UDP and ICMP ++extensions) is not possible. ++ ++<p> ++If you are doing connection tracking or NAT, then all fragments will ++get merged back together before they reach the packet filtering code, ++so you need never worry about fragments. ++ ++<p> ++Please also note that in the INPUT chain of the filter table (or any other ++table hooking into the NF_IP_LOCAL_IN hook) is traversed after ++defragmentation of the core IP stack. ++ ++<p> ++Otherwise, it is important to understand how fragments get treated by ++the filtering rules. Any filtering rule that asks for information we ++don't have will <em>not</em> match. This means that the first fragment is ++treated like any other packet. Second and further fragments won't be. ++Thus a rule <tt>-p TCP --sport www</tt> (specifying a source port of ++`www') will never match a fragment (other than the first fragment). ++Neither will the opposite rule <tt>-p TCP --sport ! www</tt>. ++ ++<p> ++However, you can specify a rule specifically for second and further ++fragments, using the `-f' (or `--fragment') flag. It is also legal to ++specify that a rule does <em>not</em> apply to second and further ++fragments, by preceding the `-f' with ` ! '. ++ ++<p> ++Usually it is regarded as safe to let second and further fragments ++through, since filtering will effect the first fragment, and thus ++prevent reassembly on the target host; however, bugs have been known ++to allow crashing of machines simply by sending fragments. Your call. ++ ++<p> ++Note for network-heads: malformed packets (TCP, UDP and ICMP packets ++too short for the firewalling code to read the ports or ICMP code and ++type) are dropped when such examinations are attempted. So are TCP ++fragments starting at position 8. ++ ++<p> ++As an example, the following rule will drop any fragments going to ++192.168.1.1: ++ ++<tscreen><verb> ++# iptables -A OUTPUT -f -d 192.168.1.1 -j DROP ++# ++</verb></tscreen> ++ ++<sect2>Extensions to iptables: New Matches ++ ++<p><tt>iptables</tt> is <bf>extensible</bf>, meaning that both the ++kernel and the iptables tool can be extended to provide new features. ++ ++<p>Some of these extensions are standard, and other are more exotic. ++Extensions can be made by other people and distributed separately for ++niche users. ++ ++<p>Kernel extensions normally live in the kernel module subdirectory, ++such as /lib/modules/2.4.0-test10/kernel/net/ipv4/netfilter. They are demand loaded if your ++kernel was compiled with CONFIG_KMOD set, so you should not need to ++manually insert them. ++ ++<p>Extensions to the iptables program are shared libraries which ++usually live in /usr/local/lib/, although a distribution ++would put them in /lib/iptables or /usr/lib/iptables. ++ ++<p>Extensions come in two types: new targets, and new matches (we'll ++talk about new targets a little later). Some protocols automatically ++offer new tests: currently these are TCP, UDP and ICMP as shown below. ++ ++<p>For these you will be able to specify the new tests on the command ++line after the `-p' option, which will load the extension. For ++explicit new tests, use the `-m' option to load the extension, after ++which the extended options will be available. ++ ++<p>To get help on an extension, use the option to load it (`-p', `-j' or ++`-m') followed by `-h' or `--help', eg: ++<tscreen><verb> ++# iptables -p tcp --help ++# ++</verb></tscreen> ++ ++<sect3>TCP Extensions ++ ++<p> ++The TCP extensions are automatically loaded if `-p tcp' is specified. ++It provides the following options (none of which match fragments). ++ ++<p> ++<descrip> ++<tag>--tcp-flags</tag> Followed by an optional `!', then two strings ++of flags, allows you to filter on specific TCP flags. The first ++string of flags is the mask: a list of flags you want to examine. The ++second string of flags tells which one(s) should be set. For example, ++ ++<tscreen><verb> ++# iptables -A INPUT --protocol tcp --tcp-flags ALL SYN,ACK -j DROP ++</verb></tscreen> ++ ++This indicates that all flags should be examined (`ALL' is synonymous ++with `SYN,ACK,FIN,RST,URG,PSH'), but only SYN and ACK should be set. ++There is also an argument `NONE' meaning no flags. ++ ++<tag>--syn</tag> Optionally preceded by a `!', this is shorthand ++ for `--tcp-flags SYN,RST,ACK SYN'. ++ ++<tag>--source-port</tag> followed by an optional `!', then either a ++single TCP port, or a range of ports. Ports can be port names, as ++listed in /etc/services, or numeric. Ranges are either two port names ++separated by a `:', or (to specify greater than or equal to a given ++port) a port with a `:' appended, or (to specify less than or equal to ++a given port), a port preceded by a `:'. ++ ++<tag>--sport</tag> is synonymous with `--source-port'. ++ ++<tag>--destination-port</tag> and <tag>--dport</tag> are the same as ++above, only they specify the destination, rather than source, port to ++match. ++ ++<tag>--tcp-option</tag> followed by an optional `!' and a number, ++matches a packet with a TCP option equaling that number. A packet ++which does not have a complete TCP header is dropped automatically if ++an attempt is made to examine its TCP options. ++</descrip> ++ ++<sect4>An Explanation of TCP Flags ++ ++<p> ++It is sometimes useful to allow TCP connections in one direction, but ++not the other. For example, you might want to allow connections to an ++external WWW server, but not connections from that server. ++ ++<p> ++The naive approach would be to block TCP packets coming from the ++server. Unfortunately, TCP connections require packets going in both ++directions to work at all. ++ ++<p> ++The solution is to block only the packets used to request a ++connection. These packets are called <bf>SYN</bf> packets (ok, ++technically they're packets with the SYN flag set, and the RST and ACK ++flags cleared, but we call them SYN packets for short). By ++disallowing only these packets, we can stop attempted connections in ++their tracks. ++ ++<p> ++The `--syn' flag is used for this: it is only valid for rules which ++specify TCP as their protocol. For example, to specify TCP connection ++attempts from 192.168.1.1: ++<tscreen><verb> ++-p TCP -s 192.168.1.1 --syn ++</verb></tscreen> ++ ++<p> ++This flag can be inverted by preceding it with a `!', which means ++every packet other than the connection initiation. ++ ++<sect3>UDP Extensions ++ ++<p> ++These extensions are automatically loaded if `-p udp' is specified. ++It provides the options `--source-port', `--sport', ++`--destination-port' and `--dport' as detailed for TCP above. ++ ++<sect3>ICMP Extensions ++ ++<p> ++This extension is automatically loaded if `-p icmp' is specified. It ++provides only one new option: ++ ++<p> ++<descrip> ++<tag>--icmp-type</tag> followed by an optional `!', then either an ++icmp type name (eg `host-unreachable'), or a numeric type (eg. `3'), ++or a numeric type and code separated by a `/' (eg. `3/3'). A list ++of available icmp type names is given using `-p icmp --help'. ++</descrip> ++ ++<sect3>Other Match Extensions ++ ++<p> ++The other extensions in the netfilter package are demonstration ++extensions, which (if installed) can be invoked with the `-m' option. ++ ++<descrip> ++<tag>mac</tag> This module must be explicitly specified with `-m mac' ++or `--match mac'. It is used for matching incoming packet's source ++Ethernet (MAC) address, and thus only useful for packets traversing ++the PREROUTING and INPUT chains. It provides only one option: ++ ++ <descrip> ++ <tag>--mac-source</tag> followed by an optional `!', then an ++ ethernet address in colon-separated hexbyte notation, eg ++ `--mac-source 00:60:08:91:CC:B7'. ++ </descrip> ++ ++<tag>limit</tag> This module must be explicitly specified with `-m ++limit' or `--match limit'. It is used to restrict the rate of ++matches, such as for suppressing log messages. It will only match a ++given number of times per second (by default 3 matches per hour, ++with a burst of 5). It takes two optional arguments: ++ ++ <descrip> ++ <tag>--limit</tag> followed by a number; specifies the maximum ++ average number of matches to allow per second. The number can ++ specify units explicitly, using `/second', `/minute', `/hour' or ++ `/day', or parts of them (so `5/second' is the same as `5/s'). ++ ++ <tag>--limit-burst</tag> followed by a number, indicating the ++ maximum burst before the above limit kicks in. ++ </descrip> ++ ++This match can often be used with the LOG target to do rate-limited ++logging. To understand how it works, let's look at the following ++rule, which logs packets with the default limit parameters: ++ ++<tscreen><verb> ++# iptables -A FORWARD -m limit -j LOG ++</verb></tscreen> ++ ++The first time this rule is reached, the packet will be logged; in ++fact, since the default burst is 5, the first five packets will be ++logged. After this, it will be twenty minutes before a packet will be ++logged from this rule, regardless of how many packets reach it. Also, ++every twenty minutes which passes without matching a packet, one of ++the burst will be regained; if no packets hit the rule for 100 ++minutes, the burst will be fully recharged; back where we started. ++ ++<p>Note: you cannot currently create a rule with a recharge time ++greater than about 59 hours, so if you set an average rate of one per ++day, then your burst rate must be less than 3. ++ ++<p>You can also use this module to avoid various denial of service ++attacks (DoS) with a faster rate to increase responsiveness. ++ ++<p>Syn-flood protection: ++<tscreen><verb> ++# iptables -A FORWARD -p tcp --syn -m limit --limit 1/s -j ACCEPT ++</verb></tscreen> ++ ++Furtive port scanner: ++<tscreen><verb> ++# iptables -A FORWARD -p tcp --tcp-flags SYN,ACK,FIN,RST RST -m limit --limit 1/s -j ACCEPT ++</verb></tscreen> ++ ++Ping of death: ++<tscreen><verb> ++# iptables -A FORWARD -p icmp --icmp-type echo-request -m limit --limit 1/s -j ACCEPT ++</verb></tscreen> ++ ++This module works like a "hysteresis door", as shown in the graph ++below. ++ ++<tscreen><verb> ++ rate (pkt/s) ++ ^ .---. ++ | / DoS \ ++ | / \ ++Edge of DoS -|.....:.........\....................... ++ = (limit * | /: \ ++limit-burst) | / : \ .-. ++ | / : \ / \ ++ | / : \ / \ ++End of DoS -|/....:..............:.../.......\..../. ++ = limit | : :`-' `--' ++-------------+-----+--------------+------------------> time (s) ++ LOGIC => Match | Didn't Match | Match ++</verb></tscreen> ++ ++Say we say match one packet per second with a five packet ++burst, but packets start coming in at four per second, for three ++seconds, then start again in another three seconds. ++<tscreen><verb> ++ ++ ++ <--Flood 1--> <---Flood 2---> ++ ++Total ^ Line __-- YNNN ++Packets| Rate __-- YNNN ++ | mum __-- YNNN ++ 10 | Maxi __-- Y ++ | __-- Y ++ | __-- Y ++ | __-- YNNN ++ |- YNNN ++ 5 | Y ++ | Y Key: Y -> Matched Rule ++ | Y N -> Didn't Match Rule ++ | Y ++ |Y ++ 0 +--------------------------------------------------> Time (seconds) ++ 0 1 2 3 4 5 6 7 8 9 10 11 12 ++</verb></tscreen> ++ ++You can see that the first five packets are allowed to exceed the one ++packet per second, then the limiting kicks in. If there is a pause, ++another burst is allowed but not past the maximum rate set by the ++rule (1 packet per second after the burst is used). ++ ++<tag>owner</tag> ++This module attempts to match various characteristics of the packet ++creator, for locally-generated packets. It is only valid in the ++OUTPUT chain, and even then some packets (such as ICMP ping responses) ++may have no owner, and hence never match. ++ ++<descrip> ++ <tag>--uid-owner userid</tag> ++Matches if the packet was created by a process with the given ++effective (numerical) user id. ++ <tag>--gid-owner groupid</tag> ++Matches if the packet was created by a process with the given ++effective (numerical) group id. ++ <tag>--pid-owner processid</tag> ++Matches if the packet was created by a process with the given ++process id. ++ <tag>--sid-owner sessionid</tag> ++Matches if the packet was created by a process in the given session ++group. ++</descrip> ++ ++<tag>unclean</tag> This experimental module must be explicitly ++specified with `-m unclean or `--match unclean'. It does various ++random sanity checks on packets. This module has not been audited, ++and should not be used as a security device (it probably makes things ++worse, since it may well have bugs itself). It provides no options. ++</descrip> ++ ++<sect3>The State Match ++ ++<p>The most useful match criterion is supplied by the `state' ++extension, which interprets the connection-tracking analysis of the ++`ip_conntrack' module. This is highly recommended. ++ ++<p>Specifying `-m state' allows an additional `--state' option, which ++is a comma-separated list of states to match (the `!' flag indicates ++<bf>not</bf> to match those states). These states are: ++ ++<descrip> ++<tag>NEW</tag> A packet which creates a new connection. ++ ++<tag>ESTABLISHED</tag> A packet which belongs to an existing ++connection (i.e., a reply packet, or outgoing packet on a connection ++which has seen replies). ++ ++<tag>RELATED</tag> A packet which is related to, but not part of, an ++existing connection, such as an ICMP error, or (with the FTP module ++inserted), a packet establishing an ftp data connection. ++ ++<tag>INVALID</tag> A packet which could not be identified for some ++reason: this includes running out of memory and ICMP errors which ++don't correspond to any known connection. Generally these packets ++should be dropped. ++</descrip> ++ ++An example of this powerful match extension would be: ++<tscreen><verb> ++# iptables -A FORWARD -i ppp0 -m state ! --state NEW -j DROP ++</verb></tscreen> ++ ++<sect1>Target Specifications ++ ++<p>Now we know what examinations we can do on a packet, we need a way ++of saying what to do to the packets which match our tests. This is ++called a rule's <bf>target</bf>. ++ ++<p>There are two very simple built-in targets: DROP and ACCEPT. We've ++already met them. If a rule matches a packet and its target is one of ++these two, no further rules are consulted: the packet's fate has been ++decided. ++ ++<p>There are two types of targets other than the built-in ones: ++extensions and user-defined chains. ++ ++<sect2>User-defined chains ++ ++<p> ++One powerful feature which <tt>iptables</tt> inherits from ++<tt>ipchains</tt> is the ability for the user to create new chains, in ++addition to the three built-in ones (INPUT, FORWARD and OUTPUT). By ++convention, user-defined chains are lower-case to distinguish them ++(we'll describe how to create new user-defined chains below in <ref ++id="chain-ops" name="Operations on an Entire Chain">). ++ ++<p> ++When a packet matches a rule whose target is a user-defined chain, the ++packet begins traversing the rules in that user-defined chain. If ++that chain doesn't decide the fate of the packet, then once traversal ++on that chain has finished, traversal resumes on the next rule in the ++current chain. ++ ++<p> ++Time for more ASCII art. Consider two (silly) chains: <tt>INPUT</tt> (the ++built-in chain) and <tt>test</tt> (a user-defined chain). ++ ++<tscreen><verb> ++ `INPUT' `test' ++ ---------------------------- ---------------------------- ++ | Rule1: -p ICMP -j DROP | | Rule1: -s 192.168.1.1 | ++ |--------------------------| |--------------------------| ++ | Rule2: -p TCP -j test | | Rule2: -d 192.168.1.1 | ++ |--------------------------| ---------------------------- ++ | Rule3: -p UDP -j DROP | ++ ---------------------------- ++</verb></tscreen> ++ ++<p> ++Consider a TCP packet coming from 192.168.1.1, going to 1.2.3.4. It ++enters the <tt>INPUT</tt> chain, and gets tested against Rule1 - no match. ++Rule2 matches, and its target is <tt>test</tt>, so the next rule examined ++is the start of <tt>test</tt>. Rule1 in <tt>test</tt> matches, but doesn't ++specify a target, so the next rule is examined, Rule2. This doesn't ++match, so we have reached the end of the chain. We return to the ++<tt>INPUT</tt> chain, where we had just examined Rule2, so we now examine ++Rule3, which doesn't match either. ++ ++<p> ++So the packet path is: ++<tscreen><verb> ++ v __________________________ ++ `INPUT' | / `test' v ++ ------------------------|--/ -----------------------|---- ++ | Rule1 | /| | Rule1 | | ++ |-----------------------|/-| |----------------------|---| ++ | Rule2 / | | Rule2 | | ++ |--------------------------| -----------------------v---- ++ | Rule3 /--+___________________________/ ++ ------------------------|--- ++ v ++</verb></tscreen> ++ ++<p>User-defined chains can jump to other user-defined chains (but ++don't make loops: your packets will be dropped if they're found to ++be in a loop). ++ ++<sect2>Extensions to iptables: New Targets ++ ++<p>The other type of extension is a target. A target extension ++consists of a kernel module, and an optional extension to ++<tt>iptables</tt> to provide new command line options. There are ++several extensions in the default netfilter distribution: ++ ++<descrip> ++<tag>LOG</tag> This module provides kernel logging of matching ++packets. It provides these additional options: ++ <descrip> ++ <tag>--log-level</tag> Followed by a level number or name. Valid ++ names are (case-insensitive) `debug', `info', `notice', `warning', ++ `err', `crit', `alert' and `emerg', corresponding to numbers 7 ++ through 0. See the man page for syslog.conf for an explanation of ++ these levels. The default is `warning'. ++ ++ <tag>--log-prefix</tag> Followed by a string of up to 29 characters, ++ this message is sent at the start of the log message, to allow it to ++ be uniquely identified. ++ </descrip> ++ ++ This module is most useful after a limit match, so you don't flood ++ your logs. ++ ++<tag>REJECT</tag> This module has the same effect as `DROP', except ++that the sender is sent an ICMP `port unreachable' error message. ++Note that the ICMP error message is not sent if (see RFC 1122): ++ ++<itemize> ++<item> The packet being filtered was an ICMP error message in the ++first place, or some unknown ICMP type. ++ ++<item> The packet being filtered was a non-head fragment. ++ ++<item> We've sent too many ICMP error messages to that destination ++recently (see /proc/sys/net/ipv4/icmp_ratelimit). ++</itemize> ++ ++REJECT also takes a `--reject-with' optional argument which alters the ++reply packet used: see the manual page. ++</descrip> ++ ++<sect2>Special Built-In Targets ++ ++<p>There are two special built-in targets: <tt>RETURN</tt> and ++<tt>QUEUE</tt>. ++ ++<p><tt>RETURN</tt> has the same effect of falling off the end of a ++chain: for a rule in a built-in chain, the policy of the chain is ++executed. For a rule in a user-defined chain, the traversal continues ++at the previous chain, just after the rule which jumped to this chain. ++ ++<p><tt>QUEUE</tt> is a special target, which queues the packet for ++userspace processing. For this to be useful, two further components are ++required: ++ ++<itemize> ++<item>a "queue handler", which deals with the actual mechanics of ++passing packets between the kernel and userspace; and ++<item>a userspace application to receive, possibly manipulate, and ++issue verdicts on packets. ++</itemize> ++The standard queue handler for IPv4 iptables is the ip_queue module, ++which is distributed with the kernel and marked as experimental. ++<p> ++The following is a quick example of how to use iptables to queue packets ++for userspace processing: ++<tscreen><verb> ++# modprobe iptable_filter ++# modprobe ip_queue ++# iptables -A OUTPUT -p icmp -j QUEUE ++</verb></tscreen> ++With this rule, locally generated outgoing ICMP packets (as created with, ++say, ping) are passed to the ip_queue module, which then attempts to deliver ++the packets to a userspace application. If no userspace application is ++waiting, the packets are dropped. ++ ++<p>To write a userspace application, use the libipq API. This is ++distributed with iptables. Example code may be found in the testsuite ++tools (e.g. redirect.c) in CVS. ++ ++<p>The status of ip_queue may be checked via: ++<tscreen><verb> ++/proc/net/ip_queue ++</verb></tscreen> ++The maximum length of the queue (i.e. the number packets delivered ++to userspace with no verdict issued back) may be controlled via: ++<tscreen><verb> ++/proc/sys/net/ipv4/ip_queue_maxlen ++</verb></tscreen> ++The default value for the maximum queue length is 1024. Once this limit ++is reached, new packets will be dropped until the length of the queue falls ++below the limit again. Nice protocols such as TCP interpret dropped packets ++as congestion, and will hopefully back off when the queue fills up. However, ++it may take some experimenting to determine an ideal maximum queue length ++for a given situation if the default value is too small. ++ ++<sect1>Operations on an Entire Chain<label id="chain-ops"> ++ ++<p> ++A very useful feature of <tt>iptables</tt> is the ability to group ++related rules into chains. You can call the chains whatever you want, ++but I recommend using lower-case letters to avoid confusion with the ++built-in chains and targets. Chain names can be up to 31 letters ++long. ++ ++<sect2>Creating a New Chain ++ ++<p> ++Let's create a new chain. Because I am such an imaginative fellow, ++I'll call it <tt>test</tt>. We use the `-N' or `--new-chain' options: ++ ++<tscreen><verb> ++# iptables -N test ++# ++</verb></tscreen> ++ ++<p> ++It's that simple. Now you can put rules in it as detailed above. ++ ++<sect2>Deleting a Chain ++ ++<p> ++Deleting a chain is simple as well, using the `-X' or `--delete-chain' ++options. Why `-X'? Well, all the good letters were taken. ++ ++<tscreen><verb> ++# iptables -X test ++# ++</verb></tscreen> ++ ++<p> ++There are a couple of restrictions to deleting chains: they must be ++empty (see <ref id="flushing" name="Flushing a Chain"> below) and they ++must not be the target of any rule. You can't delete any of the three ++built-in chains. ++ ++<p> ++If you don't specify a chain, then <em>all</em> user-defined chains ++will be deleted, if possible. ++ ++<sect2> Flushing a Chain<label id="flushing"> ++ ++<p> ++There is a simple way of emptying all rules out of a chain, using the ++`-F' (or `--flush') commands. ++ ++<tscreen><verb> ++# iptables -F FORWARD ++# ++</verb></tscreen> ++ ++<p> ++If you don't specify a chain, then <em>all</em> chains will be flushed. ++ ++<sect2>Listing a Chain ++ ++<p> ++You can list all the rules in a chain by using the `-L' (or `--list') ++command. ++ ++<p> ++The `refcnt' listed for each user-defined chain is the number of rules ++which have that chain as their target. This must be zero (and the ++chain be empty) before this chain can be deleted. ++ ++<p> ++If the chain name is omitted, all chains are listed, even empty ones. ++ ++<p> ++There are three options which can accompany `-L'. The `-n' (numeric) ++option is very useful as it prevents <tt>iptables</tt> from trying to ++lookup the IP addresses, which (if you are using DNS like most people) ++will cause large delays if your DNS is not set up properly, or you ++have filtered out DNS requests. It also causes TCP and UDP ports to ++be printed out as numbers rather than names. ++ ++<p> ++The `-v' options shows you all the details of the rules, such as the ++the packet and byte counters, the TOS comparisons, and the interfaces. ++Otherwise these values are omitted. ++ ++<p> ++Note that the packet and byte counters are printed out using the ++suffixes `K', `M' or `G' for 1000, 1,000,000 and 1,000,000,000 ++respectively. Using the `-x' (expand numbers) flag as well prints the ++full numbers, no matter how large they are. ++ ++<sect2>Resetting (Zeroing) Counters ++ ++<p> ++It is useful to be able to reset the counters. This can be done with ++the `-Z' (or `--zero') option. ++ ++<p> ++Consider the following: ++ ++<tscreen><verb> ++# iptables -L FORWARD ++# iptables -Z FORWARD ++# ++</verb></tscreen> ++ ++In the above example, some packets could pass through between the `-L' ++and `-Z' commands. For this reason, you can use the `-L' and `-Z' ++<em>together</em>, to reset the counters while reading them. ++ ++<sect2>Setting Policy<label id="policy"> ++ ++<p> ++We glossed over what happens when a packet hits the end of a built-in ++chain when we discussed how a packet walks through chains earlier. In ++this case, the <bf>policy</bf> of the chain determines the fate of the ++packet. Only built-in chains (<tt>INPUT</tt>, <tt>OUTPUT</tt> and ++<tt>FORWARD</tt>) have policies, because if a packet falls off the end ++of a user-defined chain, traversal resumes at the previous chain. ++ ++<p> ++The policy can be either <tt>ACCEPT</tt> or <tt>DROP</tt>, for ++example: ++ ++<tscreen><verb> ++# iptables -P FORWARD DROP ++# ++</verb></tscreen> ++ ++<sect> Using ipchains and ipfwadm<label id="oldstyle"> ++ ++<p> There are modules in the netfilter distribution called ipchains.o ++and ipfwadm.o. Insert one of these in your kernel (NOTE: they are ++incompatible with ip_tables.o!). Then you can use ipchains or ipfwadm ++just like the good old days. ++ ++<p> This will be supported for some time yet. I think a reasonable ++formula is 2 * [notice of replacement - initial stable release], ++beyond the date that a stable release of the replacement is available. ++This means that support will probably be dropped in Linux 2.6 or 2.8. ++ ++<sect> Mixing NAT and Packet Filtering ++ ++<p> ++It's common to want to do Network Address Translation (see the NAT ++HOWTO) and packet filtering. The good news is that they mix extremely ++well. ++ ++<p>You design your packet filtering completely ignoring any NAT you ++are doing. The sources and destinations seen by the packet filter ++will be the `real' sources and destinations. For example, if you are ++doing DNAT to send any connections to 1.2.3.4 port 80 through to ++10.1.1.1 port 8080, the packet filter would see packets going to ++10.1.1.1 port 8080 (the real destination), not 1.2.3.4 port 80. ++Similarly, you can ignore masquerading: packets will seem to come from ++their real internal IP addresses (say 10.1.1.1), and replies will seem ++to go back there. ++ ++<p>You can use the `state' match extension without making the packet ++filter do any extra work, since NAT requires connection tracking ++anyway. To enhance the simple masquerading example in the NAT HOWTO ++to disallow any new connections from coming in the ppp0 interface, you ++would do this: ++ ++<tscreen><verb> ++# Masquerade out ppp0 ++iptables -t nat -A POSTROUTING -o ppp0 -j MASQUERADE ++ ++# Disallow NEW and INVALID incoming or forwarded packets from ppp0. ++iptables -A INPUT -i ppp0 -m state --state NEW,INVALID -j DROP ++iptables -A FORWARD -i ppp0 -m state --state NEW,INVALID -j DROP ++ ++# Turn on IP forwarding ++echo 1 > /proc/sys/net/ipv4/ip_forward ++</verb></tscreen> ++ ++<sect> Differences Between iptables and ipchains<label id="Appendix-A"> ++ ++<p> ++<itemize> ++<item> Firstly, the names of the built-in chains have changed from ++lower case to UPPER case, because the INPUT and OUTPUT chains now only ++get locally-destined and locally-generated packets. They used to see ++all incoming and all outgoing packets respectively. ++ ++<item> The `-i' flag now means the incoming interface, and only works ++in the INPUT and FORWARD chains. Rules in the FORWARD or OUTPUT ++chains that used `-i' should be changed to `-o'. ++ ++<item> TCP and UDP ports now need to be spelled out with the ++--source-port or --sport (or --destination-port/--dport) options, and ++must be placed after the `-p tcp' or `-p udp' options, as this loads ++the TCP or UDP extensions respectively. ++ ++<item> The TCP -y flag is now --syn, and must be after `-p tcp'. ++ ++<item> The DENY target is now DROP, finally. ++ ++<item> Zeroing single chains while listing them works. ++ ++<item> Zeroing built-in chains also clears policy counters. ++ ++<item> Listing chains gives you the counters as an atomic snapshot. ++ ++<item> REJECT and LOG are now extended targets, meaning they are ++separate kernel modules. ++ ++<item> Chain names can be up to 31 characters. ++ ++<item> MASQ is now MASQUERADE and uses a different syntax. REDIRECT, ++while keeping the same name, has also undergone a syntax change. See ++the NAT-HOWTO for more information on how to configure both of these. ++ ++<item> The -o option is no longer used to direct packets to the userspace ++device (see -i above). Packets are now sent to userspace via the QUEUE ++target. ++ ++<item> Probably heaps of other things I forgot. ++</itemize> ++ ++<sect> Advice on Packet Filter Design ++ ++<p> ++Common wisdom in the computer security arena is to block everything, ++then open up holes as neccessary. This is usually phrased `that which ++is not explicitly allowed is prohibited'. I recommend this approach ++if security is your maximal concern. ++ ++<p>Do not run any services you do not need to, even if you think you ++have blocked access to them. ++ ++<p>If you are creating a dedicated firewall, start by running nothing, ++and blocking all packets, then add services and let packets through as ++required. ++ ++<p>I recommend security in depth: combine tcp-wrappers (for ++connections to the packet filter itself), proxies (for connections ++passing through the packet filter), route verification and packet ++filtering. Route verification is where a packet which comes from an ++unexpected interface is dropped: for example, if your internal network ++has addresses 10.1.1.0/24, and a packet with that source address comes ++in your external interface, it will be dropped. This can be enabled ++for one interface (ppp0) like so: ++ ++<tscreen><verb> ++# echo 1 > /proc/sys/net/ipv4/conf/ppp0/rp_filter ++# ++</verb></tscreen> ++ ++Or for all existing and future interfaces like this: ++ ++<tscreen><verb> ++# for f in /proc/sys/net/ipv4/conf/*/rp_filter; do ++# echo 1 > $f ++# done ++# ++</verb></tscreen> ++ ++Debian does this by default where possible. If you have asymmetric ++routing (ie. you expect packets coming in from strange directions), ++you will want to disable this filtering on those interfaces. ++ ++<p>Logging is useful when setting up a firewall if something isn't ++working, but on a production firewall, always combine it with the ++`limit' match, to prevent someone from flooding your logs. ++ ++<p>I highly recommend connection tracking for secure systems: it ++introduces some overhead, as all connections are tracked, but is very ++useful for controlling access to your networks. You may need to load ++the `ip_conntrack.o' module if your kernel does not load modules ++automatically, and it's not built into the kernel. If you want to ++accurately track complex protocols, you'll need to load the ++appropriate helper module (eg. `ip_conntrack_ftp.o'). ++ ++<tscreen><verb> ++# iptables -N no-conns-from-ppp0 ++# iptables -A no-conns-from-ppp0 -m state --state ESTABLISHED,RELATED -j ACCEPT ++# iptables -A no-conns-from-ppp0 -m state --state NEW -i ! ppp0 -j ACCEPT ++# iptables -A no-conns-from-ppp0 -i ppp0 -m limit -j LOG --log-prefix "Bad packet from ppp0:" ++# iptables -A no-conns-from-ppp0 -i ! ppp0 -m limit -j LOG --log-prefix "Bad packet not from ppp0:" ++# iptables -A no-conns-from-ppp0 -j DROP ++ ++# iptables -A INPUT -j no-conns-from-ppp0 ++# iptables -A FORWARD -j no-conns-from-ppp0 ++</verb></tscreen> ++ ++<p>Building a good firewall is beyond the scope of this HOWTO, but my ++advice is `always be minimalist'. See the Security HOWTO for more ++information on testing and probing your box. ++ ++</article> ++ +Index: iptables-1.4.12/Makefile.am +=================================================================== +--- iptables-1.4.12.orig/Makefile.am 2011-11-07 13:57:20.000000000 -0600 ++++ iptables-1.4.12/Makefile.am 2011-11-07 13:58:55.000000000 -0600 +@@ -3,7 +3,7 @@ + ACLOCAL_AMFLAGS = -I m4 + AUTOMAKE_OPTIONS = foreign subdir-objects + +-SUBDIRS = libiptc libxtables ++SUBDIRS = libiptc libxtables howtos + if ENABLE_DEVEL + SUBDIRS += include + endif diff -Nru iptables-1.6.0/debian/patches/9002-libxt_recent-Add-support-for-reap-option.patch iptables-1.6.0/debian/patches/9002-libxt_recent-Add-support-for-reap-option.patch --- iptables-1.6.0/debian/patches/9002-libxt_recent-Add-support-for-reap-option.patch 1970-01-01 00:00:00.000000000 +0000 +++ iptables-1.6.0/debian/patches/9002-libxt_recent-Add-support-for-reap-option.patch 2016-02-15 20:16:33.000000000 +0000 @@ -0,0 +1,26 @@ +Description: Fix support for reap option. +Origin: 79ddbf202a06e6f018e087a328c2ca91e65a8463 +Author: Tim Gardner <tim.gardner@canonical.com> +Last-Update: <2013-06-07> + +Index: iptables/extensions/libxt_recent.c +=================================================================== +--- iptables.orig/extensions/libxt_recent.c 2013-10-23 19:37:20.190616082 -0400 ++++ iptables/extensions/libxt_recent.c 2013-10-23 19:37:20.186616082 -0400 +@@ -170,10 +170,16 @@ + + static void recent_check(struct xt_fcheck_call *cb) + { ++ struct xt_recent_mtinfo *info = cb->data; ++ + if (!(cb->xflags & F_ANY_OP)) + xtables_error(PARAMETER_PROBLEM, + "recent: you must specify one of `--set', `--rcheck' " + "`--update' or `--remove'"); ++ ++ if ((info->check_set & XT_RECENT_REAP) && !info->seconds) ++ xtables_error(PARAMETER_PROBLEM, ++ "recent: you must specify `--seconds' with `--reap'"); + } + + static void recent_print(const void *ip, const struct xt_entry_match *match, diff -Nru iptables-1.6.0/debian/patches/series iptables-1.6.0/debian/patches/series --- iptables-1.6.0/debian/patches/series 2016-01-19 09:32:06.000000000 +0000 +++ iptables-1.6.0/debian/patches/series 2016-02-15 20:16:33.000000000 +0000 @@ -7,3 +7,6 @@ 0202-725413-sctp_man_description.patch 0301-install_iptables_apply.patch 0401-580941-iptables_apply_update.patch + +9000-howtos.patch +9002-libxt_recent-Add-support-for-reap-option.patch diff -Nru iptables-1.6.0/debian/rules iptables-1.6.0/debian/rules --- iptables-1.6.0/debian/rules 2016-01-19 09:32:06.000000000 +0000 +++ iptables-1.6.0/debian/rules 2016-02-15 20:34:19.000000000 +0000 @@ -9,7 +9,7 @@ -l$(CURDIR)/debian/build/extensions/.libs _configure := \ - --enable-libipq \ + --disable-libipq \ --enable-devel \ --libdir=/lib \ --with-xtlibdir=/lib/xtables @@ -22,3 +22,8 @@ override_dh_auto_configure: dh_auto_configure -- $(_configure) + +override_dh_install: + mkdir -p debian/tmp/lib/$(DEB_HOST_MULTIARCH) + mv debian/tmp/lib/lib*.so* debian/tmp/lib/$(DEB_HOST_MULTIARCH) + dh_install