天天看點

5. VPP源碼分析(graph node(3))

2.3. vlib_frame_t

從arguments位址開始的記憶體空間是vector空間,作為結點接收資料包索引的緩沖區。

5. VPP源碼分析(graph node(3))

2.3.1. vlib_frame_t記憶體空間

5. VPP源碼分析(graph node(3))

每個線程都會從堆中配置設定(mmap)一塊記憶體clib_per_cpu_mheaps

vm->heap_base = clib_mem_get_heap(); 預設大小為64MB

每個vlib_frame_t都将在此mheap上申請,并對應一個elt

函數vlib_frame_alloc_to_node用于申請vlib_frame_t和并将其與某個特定node關聯起來

5. VPP源碼分析(graph node(3))

2.3.2. frame_index的獲得

下圖所示為從vlib_frame_t位址算成index的過程,本質上是位址算術算法,做了一次歸一化處理。

5. VPP源碼分析(graph node(3))

當系統為32位系統時,按32Bytes對齊,是以直接通過frame_index * VLIB_FRAME_ALIGN的方式得到frame指針

5. VPP源碼分析(graph node(3))

2.4. vlib_next_frame_t

typedef struct {
  /* Frame index. */
  u32 frame_index;

  /* Node runtime for this next. */
  u32 node_runtime_index;

  /* Next frame flags. */
  u32 flags;

  /* Reflects node frame-used flag for this next. */
#define VLIB_FRAME_NO_FREE_AFTER_DISPATCH VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH

  /* This next frame owns enqueue to node corresponding to node_runtime_index. */
#define VLIB_FRAME_OWNER (1 << 15)

  /* Set when frame has been allocated for this next. */
#define VLIB_FRAME_IS_ALLOCATED    VLIB_NODE_FLAG_IS_OUTPUT

  /* Set when frame has been added to pending vector. */
#define VLIB_FRAME_PENDING VLIB_NODE_FLAG_IS_DROP

  /* Set when frame is to be freed after dispatch. */
#define VLIB_FRAME_FREE_AFTER_DISPATCH VLIB_NODE_FLAG_IS_PUNT

  /* Set when frame has traced packets. */
#define VLIB_FRAME_TRACE VLIB_NODE_FLAG_TRACE

  /* Number of vectors enqueue to this next since last overflow. */
  u32 vectors_since_last_overflow;
} vlib_next_frame_t;           

2.5. vlib_pending_frame_t

/* A frame pending dispatch by main loop. */
typedef struct {
  /* Node and runtime for this frame. */
  u32 node_runtime_index;

  /* Frame index (in the heap). */
  u32 frame_index;

  /* Start of next frames for this node. */
  u32 next_frame_index;

  /* Special value for next_frame_index when there is no next frame. */
#define VLIB_PENDING_FRAME_NO_NEXT_FRAME ((u32) ~0)
} vlib_pending_frame_t;           

2.6. vlib_node_t

typedef struct vlib_node_t {
  /* Vector processing function for this node. */
  vlib_node_function_t *function;

  /* Node name. */
  u8 *name;

  /* Node name index in elog string table. */
  u32 name_elog_string;

  /* Total statistics for this node. */
  vlib_node_stats_t stats_total;

  /* Saved values as of last clear (or zero if never cleared).
     Current values are always stats_total - stats_last_clear. */
  vlib_node_stats_t stats_last_clear;

  /* Type of this node. */
  vlib_node_type_t type;

  /* Node index. */
  u32 index;

  /* Index of corresponding node runtime. */
  u32 runtime_index;

  /* Runtime data for this node. */
  void *runtime_data;

  /* Node flags. */
  u16 flags;

/* Processing function keeps frame.
Tells node dispatching code not to free frame after dispatch is done.  */
#define VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH (1 << 0)

  /* Node counts as output/drop/punt node for stats purposes. */
#define VLIB_NODE_FLAG_IS_OUTPUT (1 << 1)
#define VLIB_NODE_FLAG_IS_DROP (1 << 2)
#define VLIB_NODE_FLAG_IS_PUNT (1 << 3)
#define VLIB_NODE_FLAG_IS_HANDOFF (1 << 4)

  /* Set if current node runtime has traced vectors. */
#define VLIB_NODE_FLAG_TRACE (1 << 5)

#define VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE (1 << 6)
#define VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE (1 << 7)

  /* State for input nodes. */
  u8 state;

  /* Number of bytes of run time data. */
  u8 runtime_data_bytes;

  /* Number of error codes used by this node. */
  u16 n_errors;

  /* Size of scalar and vector arguments in bytes. */
  u16 scalar_size, vector_size;

  /* Handle/index in error heap for this node. */
  u32 error_heap_handle;
  u32 error_heap_index;

  /* Error strings indexed by error code for this node. */
  char **error_strings;

  /* Vector of next node names.
     Only used before next_nodes array is initialized. */
  char **next_node_names;

  /* Next node indices for this node. */
  u32 *next_nodes;

  /* Name of node that we are sibling of. */
  char *sibling_of;

  /* Bitmap of all of this node's siblings. */
  uword *sibling_bitmap;

  /* Total number of vectors sent to each next node. */
  u64 *n_vectors_by_next_node;

  /* Hash table mapping next node index into slot in
     next_nodes vector.  Quickly determines whether this node
     is connected to given next node and, if so, with which slot. */
  uword *next_slot_by_node;

  /* Bitmap of node indices which feed this node. */
  uword *prev_node_bitmap;

  /* Node/next-index which own enqueue rights with to this node. */
  u32 owner_node_index, owner_next_index;

  /* Buffer format/unformat for this node. */
  format_function_t *format_buffer;
  unformat_function_t *unformat_buffer;

  /* Trace buffer format/unformat for this node. */
  format_function_t *format_trace;

  /* Function to validate incoming frames. */
u8 *(*validate_frame) (struct vlib_main_t * vm, struct vlib_node_runtime_t *, struct vlib_frame_t * f);

  /* for pretty-printing, not typically valid */
  u8 *state_string;
} vlib_node_t;           

2.7. vlib_node_main_t

typedef struct {
  /* Public nodes. */
  vlib_node_t **nodes;

  /* Node index hashed by node name. */
  uword *node_by_name;

  u32 flags;
#define VLIB_NODE_MAIN_RUNTIME_STARTED (1 << 0)

  /* Nodes segregated by type for cache locality.
     Does not apply to nodes of type VLIB_NODE_TYPE_INTERNAL. */
  vlib_node_runtime_t *nodes_by_type[VLIB_N_NODE_TYPE];

  /* Node runtime indices for input nodes with pending interrupts. */
  u32 *pending_interrupt_node_runtime_indices;
  clib_spinlock_t pending_interrupt_lock;

  /* Input nodes are switched from/to interrupt to/from polling mode
     when average vector length goes above/below polling/interrupt
     thresholds. */
  u32 polling_threshold_vector_length;
  u32 interrupt_threshold_vector_length;

  /* Vector of next frames. */
  vlib_next_frame_t *next_frames;

  /* Vector of internal node's frames waiting to be called. */
  vlib_pending_frame_t *pending_frames;

  /* Timing wheel for scheduling time-based node dispatch. */
  void *timing_wheel;

  vlib_signal_timed_event_data_t *signal_timed_event_data_pool;

  /* Opaque data vector added via timing_wheel_advance. */
  u32 *data_from_advancing_timing_wheel;

  /* CPU time of next process to be ready on timing wheel. */
  f64 time_next_process_ready;

  /* Vector of process nodes.
     One for each node of type VLIB_NODE_TYPE_PROCESS. */
  vlib_process_t **processes;

  /* Current running process or ~0 if no process running. */
  u32 current_process_index;

  /* Pool of pending process frames. */
  vlib_pending_frame_t *suspended_process_frames;

  /* Vector of event data vectors pending recycle. */
  void **recycled_event_data_vectors;

  /* Current counts of nodes in each state. */
  u32 input_node_counts_by_state[VLIB_N_NODE_STATE];

  /* Hash of (scalar_size,vector_size) to frame_sizes index. */
  uword *frame_size_hash;

  /* Per-size frame allocation information. */
  vlib_frame_size_t *frame_sizes;

  /* Time of last node runtime stats clear. */
  f64 time_last_runtime_stats_clear;

  /* Node registrations added by constructors */
  vlib_node_registration_t *node_registrations;
} vlib_node_main_t;           

2.8. node排程流程

vlib_main_or_worker_loop的主要流程如下:

  1. 處理pre-input結點——polling形式
    5. VPP源碼分析(graph node(3))
  2. 處理input結點——polling形式
    5. VPP源碼分析(graph node(3))
  3. 處理來自control-plane API queue的信号事件
    5. VPP源碼分析(graph node(3))
  4. 處理input結點——interrupt形式
    5. VPP源碼分析(graph node(3))
  5. timing wheel處理
    5. VPP源碼分析(graph node(3))
  6. 執行pending_frames中記錄的資訊
    5. VPP源碼分析(graph node(3))

根據pending_frames裡面記載的資訊進行nodes排程,這裡nodes排程類似于流水線處理方式,一直執行直到pending_frames中無任何資料為止。

  1. pending_frames中的internal結點也許會resume process node是以需要在跳回timing wheel做一次排程
    5. VPP源碼分析(graph node(3))

繼續閱讀