nfdump/bin/parse_csv.pl

206 lines
6.1 KiB
Perl
Executable File

#!/usr/bin/perl
#
# Copyright (c) 2016, Peter Haag
# Copyright (c) 2009, Peter Haag
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The names of its contributors must not be used to endorse or promote
# products derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
#
use strict;
use warnings;
my $need_tags = 1;
my $in_block = 0;
my $in_summary = 0;
my $block_no = 0;
my $line_no = 0;
my @data_blocks;
my %summary;
my @tags;
#
# The cvs output format consists of one or more output blocks and one summary block.
# Each output block starts with a cvs index line followed by the cvs record lines.
# The index lines describes the order, how each following record is composed.
#
# Example:
# Index line: ts,te,td,sa,da,sp,dp,pr,...
# Record line: 2004-07-11 10:30:00,2004-07-11 10:30:10,10.010,172.16.1.66,192.168.170.100,1024,25,TCP,..
#
# All records are in ASCII readable form. Numbers are not scaled, so each line can easly
# be parsed.
#
# Below you find the list off all indices, defined by nfdump 1.6. The perl script
# automatically parses a cvs output format and stores the record into a data stucture
# which can be used for further processing. This type of representation is very flexible
# as the leading cvs index line describes the following block. This makes the format also
# compatible with furture versions of nfdump.
#
# Indices used in nfdump 1.6
# ts,te,td time records: t-start, t-end, duration
# sa,da src dst address
# sp,dp src, dst port
# pr protocol
# flg flags
# fwd forwarding status
# stos src tos
# ipkt,ibyt input packets/bytes
# opkt,obyt output packets, bytes
# in,out input/output interface SNMP number
# sas,das src, dst AS
# smk,dmk src, dst mask
# dtos dst tos
# dir direction
# nh,nhb nethop IP address, bgp next hop IP
# svln,dvln src, dst vlan id
# ismc,odmc input src, output dst MAC
# idmc,osmc input dst, output src MAC
# mpls1,mpls2,mpls3,mpls4,mpls5,mpls6,mpls7,mpls8,mpls9,mpls10 MPLS label 1-10
# cl,sl,al client server application latency (nfpcapd,nprobe)
# ra router IP
# eng router engine type/id
# exid exporter SysID
#
# usage: ./nfdump -r ... -o csv | ./parse_csv.pl
#
# Start parsing the cvs blocks
while (<>) {
chomp;
if ( $_ =~ /^$/ ) { # empty line
$block_no++;
$line_no = 0;
$need_tags = 1;
next;
}
if ( $_ =~ /^Summary/ ) {
$in_summary = 1;
$need_tags = 1;
$in_block = 0;
next;
}
if ( $need_tags ) {
@tags = split /,/, $_;
$need_tags = 0;
$in_block = 1;
next;
}
if ( $in_summary ) {
my @vals = split /,/, $_;
@summary{@tags} = @vals;
} elsif ( $in_block ) {
my @vals = split /,/, $_;
my %a;
@a{@tags} = @vals;
$data_blocks[$block_no][$line_no++] = \%a;
next;
}
}
# Everything parsed
# Insert your code here.
#
# As an example, for post processing print each record.
for (my $i=0; $i<=$block_no; $i++ ) {
my $lines = $data_blocks[$i];
print "Start of output block\n";
foreach my $line ( @$lines ) {
print "
Start : $$line{'ts'}
End : $$line{'te'}
Duration : $$line{'td'}
Src Addr : $$line{'sa'}
Dst Addr : $$line{'da'}
Src Port : $$line{'sp'}
Dst Port : $$line{'dp'}
Protocol : $$line{'pr'}
Flags : $$line{'flg'}
FWD Status : $$line{'fwd'}
Direction : $$line{'dir'}
Src TOS : $$line{'stos'}
Dst TOS : $$line{'dtos'}
Input IF : $$line{'in'}
Output IF : $$line{'out'}
Src VLAN : $$line{'svln'}
Dst VLAN : $$line{'dvln'}
Next Hop IP : $$line{'nh'}
BGP Next IP : $$line{'nhb'}
Src AS : $$line{'sas'}
Dst AS : $$line{'das'}
Src Mask : $$line{'smk'}
Dst Mask : $$line{'dmk'}
Input Pack : $$line{'ipkt'}
Output Pack : $$line{'opkt'}
Input Bytes : $$line{'ibyt'}
Output Bytes : $$line{'obyt'}
In Src MAC : $$line{'ismc'}
Out Dst MAC : $$line{'odmc'}
In Dst MAC : $$line{'idmc'}
Out Src MAC : $$line{'osmc'}
MPLS 1 : $$line{'mpls1'}
MPLS 2 : $$line{'mpls2'}
MPLS 3 : $$line{'mpls3'}
MPLS 4 : $$line{'mpls4'}
MPLS 5 : $$line{'mpls5'}
MPLS 6 : $$line{'mpls6'}
MPLS 7 : $$line{'mpls7'}
MPLS 8 : $$line{'mpls8'}
MPLS 8 : $$line{'mpls8'}
MPLS 10 : $$line{'mpls10'}
Client latency : $$line{'cl'}
Server latency : $$line{'sl'}
Appl. latency : $$line{'al'}
Router IP : $$line{'ra'}
Engine type/id : $$line{'eng'}
Exporter SysID : $$line{'exid'}
\n";
}
print "\n";
}
#
# Summary lines
print "\nSummary:\n";
print "Number of Flows : $summary{'flows'}\n";
print "Number of Packets : $summary{'packets'}\n";
print "Number of Bytes : $summary{'bytes'}\n";
print "Average pps : $summary{'avg_pps'}\n";
print "Average bps : $summary{'avg_bps'}\n";
print "Average bpp : $summary{'avg_bpp'}\n";
# To view the raw records for debugging purpose, uncomment the lines below
# use Data::Dumper;
# print Dumper(@data_blocks);
# print Dumper(%summary);