bird ospf mtu problem and negotiation freezes on EXSTART/BDR state

Jordan Ostrev Jordan.Ostrev at vivacom.bg
Tue Mar 15 16:12:59 CET 2016


Dear All,
As a new user trying to implement bird version 1.5 into some lab environment, I’ve made my first easy setup:

Cisco ASR box ( with many VRFs ) <----> FreeBSD with Bird installed ( with many FIBS ). I’m trying to peer one of cisco vrf with one of the fib using OSPFv2.

On Cisco side my config looks like:

interface GigabitEthernet0/0/1/9
description -=- GSS server -=-
mtu 9014
negotiation auto
load-interval 30
!
interface GigabitEthernet0/0/1/9.1501
description -=- to-testbed-server-radius -=-
mtu 1514
vrf Corp1
ipv4 address 10.98.251.245 255.255.255.252
encapsulation dot1q 1501
!
router ospf Corp_Clients
vrf Corp1
  area 0
   !
   interface GigabitEthernet0/0/1/9.1501
    mtu-ignore enable
   !
  !
!
!


On FreeBSD side my bird.conf contains:

#log syslog { debug, trace, info, remote, warning, error, auth, fatal, bug };
log syslog all;
log stderr all;
#debug protocols all;
timeformat base     iso long;
timeformat log      iso long;
timeformat protocol iso long;
timeformat route    iso long;
log "/var/log/bird.log" all;
log stderr all;

filter import_OSPF {
  if ( source = RTS_OSPF && net != 0.0.0.0/0 ) then {
    print "net accepted:", net;
    accept;
  }
reject;
}

filter export_OSPF {
  if ( source = RTS_DEVICE ) then {
    print "net accepted:", net;
    ospf_metric2 = 20;
    accept;
  }
  if ( source = RTS_STATIC_DEVICE && net = 0.0.0.0/0 ) then {
    print"net accepted:", net;
    ospf_metric2 = 5;
    accept;
  }
reject;
}

filter ospf_i { accept;}
filter ospf_e { accept;}

router id 10.96.91.12;

table fib0;
table fib1;
table fib2;
table fib3;
table fib4;

debug protocols all;

protocol direct {
#    interface "-lo*", "*";  # Restrict network interfaces it works with
}

protocol kernel kernel_fib0 {
    table fib0;
    scan time 20;
    import all;
    export all;
    kernel table 0;
    learn;
    persist;
}

protocol kernel kernel_fib1 {
    table fib1;
    scan time 20;
    import all;
    export all;
    kernel table 1;
    learn;
    persist;
}

protocol kernel kernel_fib2 {
    table fib2;
    scan time 20;
    import all;
    export all;
    kernel table 2;
    learn;
    persist;
}

protocol kernel kernel_fib3 {
    table fib3;
    scan time 20;
    import all;
    export all;
    kernel table 3;
    learn;
    persist;
}

protocol kernel kernel_fib4 {
    table fib4;
    scan time 20;
    import all;
    export all;
    kernel table 4;
    learn;
    persist;
}


protocol device {
        scan time 10;           # Scan interfaces every 10 seconds
        primary "vlan1501" 10.98.251.246;
}

protocol static static_fib0 {
    table fib0;
#    route 0.0.0.0/0 via 10.96.91.1;
    import all;
}

protocol static static_fib1 {
    table fib1;
    import all;
    export all;
}

protocol static static_fib2 {
    table fib2;
    import all;
}

protocol static static_fib3 {
    table fib3;
    import all;
}

protocol static static_fib4 {
    table fib4;
    import all;
}



protocol ospf Corp_Clients {
   tick 2;
   rfc1583compat no;
   debug all;
#   export filter export_OSPF;
#   import filter import_OSPF;
   import filter ospf_i; # import to kernel
   export filter ospf_e; # export from kernel
   table fib1;
   ecmp yes;
   area 0.0.0.0 {
       stub no;
       interface "vlan1501" {
          type broadcast;
         tx length 1468;
#          real broadcast yes;
       };
       interface "lo1501" {
       };
   };
};


Currently on cisco asr the session freezes on EXSTART/BDR state

* Indicates MADJ interface
# Indicates Neighbor awaiting BFD session up

Neighbors for OSPF Corp_Clients, VRF Corp1

Neighbor ID     Pri   State           Dead Time   Address         Interface
10.96.91.12     1     EXSTART/BDR     00:00:31    10.98.251.246   GigabitEthernet0/0/1/9.1501
    Neighbor is up for 00:00:28

Total neighbor count: 1
root at testbed-server:~ # birdc show int
BIRD 1.5.0 ready.
bce0 DOWN (index=1)
       MultiAccess Broadcast Multicast AdminUp LinkUp MTU=1500
bce1 DOWN (index=2)
       MultiAccess Broadcast Multicast AdminUp LinkUp MTU=9000
lo0 up (index=3)
       MultiAccess AdminUp LinkUp Loopback Ignored MTU=16384
       127.0.0.1/8 (Primary, scope host)
vlan1777 DOWN (index=4)
       MultiAccess Broadcast Multicast AdminDown LinkUp MTU=1500
vlan306 up (index=5)
       MultiAccess Broadcast Multicast AdminUp LinkUp MTU=1500
       10.96.91.12/24 (Primary, scope site)
vlan4094 up (index=6)
       MultiAccess Broadcast Multicast AdminUp LinkUp MTU=9000
       192.168.192.1/24 (Primary, scope site)
vlan4093 up (index=7)
       MultiAccess Broadcast Multicast AdminUp LinkUp MTU=9000
       192.168.255.16/28 (Primary, scope site)
vlan4092 DOWN (index=8)
       MultiAccess Broadcast Multicast AdminUp LinkUp MTU=9000
vlan1501 up (index=9)
       MultiAccess Broadcast Multicast AdminUp LinkUp MTU=1500
       10.98.251.246/30 (Primary, opposite 10.98.251.245, scope site)
lo1501 up (index=10)
       MultiAccess AdminUp LinkUp Loopback Ignored MTU=16384
       192.168.15.1/32 (Primary, scope site)
root at testbed-server:~ # birdc show ospf int
BIRD 1.5.0 ready.
Corp_Clients:
Interface vlan1501 (10.98.251.244/30)
       Type: broadcast
       Area: 0.0.0.0 (0)
       State: DR
       Priority: 1
       Cost: 10
       ECMP weight: 1
       Hello timer: 10
       Wait timer: 40
       Dead timer: 40
       Retransmit timer: 5
       Designed router (ID): 10.96.91.12
       Designed router (IP): 10.98.251.246
       Backup designed router (ID): 0.0.0.0
       Backup designed router (IP): 0.0.0.0
Interface lo1501 (192.168.15.1/32)
       Type: nbma
       Area: 0.0.0.0 (0)
       State: Waiting (stub)
       Priority: 1
       Cost: 10
       ECMP weight: 1
       Hello timer: 10
       Poll timer: 20
       Wait timer: 40
       Dead timer: 40
       Retransmit timer: 5
       Designed router (ID): 0.0.0.0
       Designed router (IP): 0.0.0.0
       Backup designed router (ID): 0.0.0.0
       Backup designed router (IP): 0.0.0.0
root at testbed-server:~ # birdc show ospf stat
BIRD 1.5.0 ready.

area 0.0.0.0

       router 10.96.91.12
             distance 0
             stubnet 10.98.251.244/30 metric 10
             stubnet 192.168.15.1/32 metric 0
             external 10.98.251.246/32 metric2 10000
setfib 1 ping -s 1468 -D 10.98.251.245
PING 10.98.251.245 (10.98.251.245): 1468 data bytes
1476 bytes from 10.98.251.245: icmp_seq=0 ttl=255 time=0.670 ms
1476 bytes from 10.98.251.245: icmp_seq=1 ttl=255 time=5.484 ms
1476 bytes from 10.98.251.245: icmp_seq=2 ttl=255 time=4.700 ms
1476 bytes from 10.98.251.245: icmp_seq=3 ttl=255 time=3.705 ms
1476 bytes from 10.98.251.245: icmp_seq=4 ttl=255 time=2.465 ms
1476 bytes from 10.98.251.245: icmp_seq=5 ttl=255 time=0.646 ms
1476 bytes from 10.98.251.245: icmp_seq=6 ttl=255 time=3.046 ms
^C
--- 10.98.251.245 ping statistics ---
7 packets transmitted, 7 packets received, 0.0% packet loss
round-trip min/avg/max/stddev = 0.646/2.959/5.484/1.724 ms


On Cisco side the debug has following messages into it:

RP/0/RSP0/CPU0:Mar 15 17:02:50.132 : ospf[1019]:  DR/BDR election on GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:02:50.132 : ospf[1019]:  Elect BDR Router ID 10.96.91.12(IP Addr 10.98.251.246)
RP/0/RSP0/CPU0:Mar 15 17:02:50.132 : ospf[1019]:  Elect DR Router ID 10.96.237.126(IP Addr 10.98.251.245)
RP/0/RSP0/CPU0:Mar 15 17:02:50.132 : ospf[1019]:        DR: 10.96.237.126(Id) 10.98.251.245(IP Addr)
RP/0/RSP0/CPU0:Mar 15 17:02:50.132 : ospf[1019]:       BDR: 10.96.91.12(Id) 10.98.251.246(IP Addr)
RP/0/RSP0/CPU0:Mar 15 17:02:53.634 : ospf[1019]:  Retransmitting DBD to 10.96.91.12 on GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:02:53.634 : ospf[1019]:  Retransmitting DBD to 10.96.91.12 on GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:02:53.634 : ospf[1019]:  Send DBD  to 10.96.91.12(10.98.251.246) on GigabitEthernet0/0/1/9.1501 seq 0x36b opt 0x52 flag 0x7 len 32, vrf Corp1 vrfid 0x6000000d
RP/0/RSP0/CPU0:Mar 15 17:02:58.348 : ospf[1019]:  Retransmitting DBD to 10.96.91.12 on GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:02:58.348 : ospf[1019]:  Retransmitting DBD to 10.96.91.12 on GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:02:58.348 : ospf[1019]:  Send DBD  to 10.96.91.12(10.98.251.246) on GigabitEthernet0/0/1/9.1501 seq 0x36b opt 0x52 flag 0x7 len 32, vrf Corp1 vrfid 0x6000000d
RP/0/RSP0/CPU0:Mar 15 17:03:02.887 : ospf[1019]:  Retransmitting DBD to 10.96.91.12 on GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:03:02.887 : ospf[1019]:  Retransmitting DBD to 10.96.91.12 on GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:03:02.887 : ospf[1019]:  Send DBD  to 10.96.91.12(10.98.251.246) on GigabitEthernet0/0/1/9.1501 seq 0x36b opt 0x52 flag 0x7 len 32, vrf Corp1 vrfid 0x6000000d
RP/0/RSP0/CPU0:Mar 15 17:03:07.690 : ospf[1019]:  Retransmitting DBD to 10.96.91.12 on GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:03:07.690 : ospf[1019]:  Retransmitting DBD to 10.96.91.12 on GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:03:07.690 : ospf[1019]:  Send DBD  to 10.96.91.12(10.98.251.246) on GigabitEthernet0/0/1/9.1501 seq 0x36b opt 0x52 flag 0x7 len 32, vrf Corp1 vrfid 0x6000000d
RP/0/RSP0/CPU0:Mar 15 17:03:12.378 : ospf[1019]:  Retransmitting DBD to 10.96.91.12 on GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:03:12.378 : ospf[1019]:  Retransmitting DBD to 10.96.91.12 on GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:03:12.378 : ospf[1019]:  Send DBD  to 10.96.91.12(10.98.251.246) on GigabitEthernet0/0/1/9.1501 seq 0x36b opt 0x52 flag 0x7 len 32, vrf Corp1 vrfid 0x6000000d
RP/0/RSP0/CPU0:Mar 15 17:03:16.902 : ospf[1019]:  Retransmitting DBD to 10.96.91.12 on GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:03:16.902 : ospf[1019]:  Retransmitting DBD to 10.96.91.12 on GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:03:16.902 : ospf[1019]:  Send DBD  to 10.96.91.12(10.98.251.246) on GigabitEthernet0/0/1/9.1501 seq 0x36b opt 0x52 flag 0x7 len 32, vrf Corp1 vrfid 0x6000000d
RP/0/RSP0/CPU0:Mar 15 17:03:21.451 : ospf[1019]:  Retransmitting DBD to 10.96.91.12 on GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:03:21.451 : ospf[1019]:  Retransmitting DBD to 10.96.91.12 on GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:03:21.451 : ospf[1019]:  Send DBD  to 10.96.91.12(10.98.251.246) on GigabitEthernet0/0/1/9.1501 seq 0x36b opt 0x52 flag 0x7 len 32, vrf Corp1 vrfid 0x6000000d
RP/0/RSP0/CPU0:Mar 15 17:03:26.434 : ospf[1019]:  Retransmitting DBD to 10.96.91.12 on GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:03:26.434 : ospf[1019]:  Retransmitting DBD to 10.96.91.12 on GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:03:26.434 : ospf[1019]:  Send DBD  to 10.96.91.12(10.98.251.246) on GigabitEthernet0/0/1/9.1501 seq 0x36b opt 0x52 flag 0x7 len 32, vrf Corp1 vrfid 0x6000000d
RP/0/RSP0/CPU0:Mar 15 17:03:31.052 : ospf[1019]:  Retransmitting DBD to 10.96.91.12 on GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:03:31.052 : ospf[1019]:  Retransmitting DBD to 10.96.91.12 on GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:03:31.052 : ospf[1019]:  Send DBD  to 10.96.91.12(10.98.251.246) on GigabitEthernet0/0/1/9.1501 seq 0x36b opt 0x52 flag 0x7 len 32, vrf Corp1 vrfid 0x6000000d
RP/0/RSP0/CPU0:Mar 15 17:03:35.878 : ospf[1019]:  Retransmitting DBD to 10.96.91.12 on GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:03:35.878 : ospf[1019]:  Retransmitting DBD to 10.96.91.12 on GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:03:35.878 : ospf[1019]:  Send DBD  to 10.96.91.12(10.98.251.246) on GigabitEthernet0/0/1/9.1501 seq 0x36b opt 0x52 flag 0x7 len 32, vrf Corp1 vrfid 0x6000000d
RP/0/RSP0/CPU0:Mar 15 17:03:40.715 : ospf[1019]:  Retransmitting DBD to 10.96.91.12 on GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:03:40.715 : ospf[1019]:  Retransmitting DBD to 10.96.91.12 on GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:03:40.715 : ospf[1019]:  Send DBD  to 10.96.91.12(10.98.251.246) on GigabitEthernet0/0/1/9.1501 seq 0x36b opt 0x52 flag 0x7 len 32, vrf Corp1 vrfid 0x6000000d
RP/0/RSP0/CPU0:Mar 15 17:03:45.312 : ospf[1019]:  Retransmitting DBD to 10.96.91.12 on GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:03:45.312 : ospf[1019]:  10.96.91.12 address 10.98.251.246 on GigabitEthernet0/0/1/9.1501 is dead, state DOWN
RP/0/RSP0/CPU0:Mar 15 17:03:45.312 : ospf[1019]: %ROUTING-OSPF-5-ADJCHG : Process Corp_Clients, Nbr 10.96.91.12 on GigabitEthernet0/0/1/9.1501 in area 0 from EXSTART to DOWN, Neighbor Down: too many DBD retransmissions, vrf Corp1 vrfid 0x6000000d
RP/0/RSP0/CPU0:Mar 15 17:03:45.312 : ospf[1019]:  Flooding Stats for nbr 10.96.91.12, LSA-Req Sent: 0 packets, 0 LSAs; LSA Upd rec'ed: 0 packets, 0 LSAs, DBD-rec'ed: 0 packets, 0 LSAs
RP/0/RSP0/CPU0:Mar 15 17:03:45.312 : ospf[1019]:  Neighbor change Event on intf GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:03:45.312 : ospf[1019]:  DR/BDR election on GigabitEthernet0/0/1/9.1501
RP/0/RSP0/CPU0:Mar 15 17:03:45.312 : ospf[1019]:  Elect BDR Router ID 0.0.0.0(IP Addr 0.0.0.0)
RP/0/RSP0/CPU0:Mar 15 17:03:45.312 : ospf[1019]:  Elect DR Router ID 10.96.237.126(IP Addr 10.98.251.245)
RP/0/RSP0/CPU0:Mar 15 17:03:45.312 : ospf[1019]:        DR: 10.96.237.126(Id) 10.98.251.245(IP Addr)
RP/0/RSP0/CPU0:Mar 15 17:03:45.312 : ospf[1019]:       BDR: none


Any advices how to proceed in this situation are welcome!





B.R. Jordan
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://trubka.network.cz/pipermail/bird-users/attachments/20160315/e25037e8/attachment.html>


More information about the Bird-users mailing list