DPDK开发之基于UDP的DNS服务器

阿里云国内75折 回扣 微信号:monov8
阿里云国际,腾讯云国际,低至75折。AWS 93折 免费开户实名账号 代冲值 优惠多多 微信号:monov8 飞机:@monov6

基于UDP的DNS服务器

背景

在云主机上如果每次对外请求都要经过114.114.114.114主机查询DNS这是非常耗时的可以考虑内部搭建一个DNS服务器就不需要频繁的查询外部DNS服务器从而提高响应时间。

DNS概念

DNS就是一个查表的过程客户端发起请求返回对应的IP地址DNS没有磁盘操作而是纯内存操作所以能影响DNS性能的主要在网卡上即取决于网卡的性能。DPDK可以作为DNS服务器业务处理的底层框架。
DNS工作过程

本地DNS服务器

实现逻辑

  1. 绑定53的端口。
  2. 接收一帧数据recvfrom。
  3. 解析数据decode。主要是解出域名。
  4. 查表找到域名对应的IP。
  5. 打包数据encode。
  6. 发送出去sendto。
    DNS协议

环境配置

1导出dpdk环境变量。

cd dpdk路径
# 如 dpdk/dpdk-stable-19.08.2/
# 切换root权限
sudo su 
export RTE_SDK=dpdk路径
export RTE_TARGET=x86_64-native-linux-gcc

2配置dpdk。

./usertools/dpdk-setup.sh

依次执行
43加载DPDK UIO 模块即插入driver
44加载VFIO模块也是一种driver
45加载KNI模块将一些数据写回内核
46设置巨页可以不需要频繁页交换512
47设置巨页可512
49执行之前需要eth0 down掉执行sudo ifconfig eth0 down使绑定dpdkpci地址=对应eth0的如0000:03:00.0
60退出

完整代码实现

代码中实现了

  1. udp协议的收发。
  2. KNI将不关注的协议写回内核以及从内核获取响应数据发送到网卡。
  3. 基于udp的dns服务器。
  4. 性能测试可以使用dnsperf工具。

(dns.h)

#ifndef __DPDK_DNS_H__
#define __DPDK_DNS_H__


/* Response Type */
enum {
  Ok_ResponseType = 0,
  FormatError_ResponseType = 1,
  ServerFailure_ResponseType = 2,
  NameError_ResponseType = 3,
  NotImplemented_ResponseType = 4,
  Refused_ResponseType = 5
};

/* Resource Record Types */
enum {
  A_Resource_RecordType = 1,
  NS_Resource_RecordType = 2,
  CNAME_Resource_RecordType = 5,
  SOA_Resource_RecordType = 6,
  PTR_Resource_RecordType = 12,
  MX_Resource_RecordType = 15,
  TXT_Resource_RecordType = 16,
  AAAA_Resource_RecordType = 28,
  SRV_Resource_RecordType = 33
};

/* Operation Code */
enum {
  QUERY_OperationCode = 0, /* standard query */
  IQUERY_OperationCode = 1, /* inverse query */
  STATUS_OperationCode = 2, /* server status request */
  NOTIFY_OperationCode = 4, /* request zone transfer */
  UPDATE_OperationCode = 5 /* change resource records */
};

/* Response Code */
enum {
  NoError_ResponseCode = 0,
  FormatError_ResponseCode = 1,
  ServerFailure_ResponseCode = 2,
  NameError_ResponseCode = 3
};

/* Query Type */
enum {
  IXFR_QueryType = 251,
  AXFR_QueryType = 252,
  MAILB_QueryType = 253,
  MAILA_QueryType = 254,
  STAR_QueryType = 255
};

/*
* Types.
*/

/* Question Section */
struct Question {
  char *qName;
  uint16_t qType;
  uint16_t qClass;
  struct Question *next; // for linked list
};

/* Data part of a Resource Record */
union ResourceData {
  struct {
    uint8_t txt_data_len;
    char *txt_data;
  } txt_record;
  struct {
    uint8_t addr[4];
  } a_record;
  struct {
    uint8_t addr[16];
  } aaaa_record;
};

/* Resource Record Section */
struct ResourceRecord {
  char *name;
  uint16_t type;
  uint16_t class;
  uint32_t ttl;
  uint16_t rd_length;
  union ResourceData rd_data;
  struct ResourceRecord *next; // for linked list
};

struct Message {
  uint16_t id; /* Identifier */

  /* Flags */
  uint16_t qr; /* Query/Response Flag */
  uint16_t opcode; /* Operation Code */
  uint16_t aa; /* Authoritative Answer Flag */
  uint16_t tc; /* Truncation Flag */
  uint16_t rd; /* Recursion Desired */
  uint16_t ra; /* Recursion Available */
  uint16_t rcode; /* Response Code */

  uint16_t qdCount; /* Question Count */
  uint16_t anCount; /* Answer Record Count */
  uint16_t nsCount; /* Authority Record Count */
  uint16_t arCount; /* Additional Record Count */

  /* At least one question; questions are copied to the response 1:1 */
  struct Question *questions;

  /*
  * Resource records to be send back.
  * Every resource record can be in any of the following places.
  * But every place has a different semantic.
  */
  struct ResourceRecord *answers;
  struct ResourceRecord *authorities;
  struct ResourceRecord *additionals;
};


int decode_msg(struct Message *msg, const uint8_t *buffer, int size);

void resolve_query(struct Message *msg);

int encode_msg(struct Message *msg, uint8_t **buffer);

void free_questions(struct Question *qq);

void free_resource_records(struct ResourceRecord *rr);

void print_message(struct Message *msg);


#endif

(dpdk-dns.c)

#include <stdio.h>
#include <stdlib.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#include <netdb.h>
#include <ifaddrs.h>
#include <errno.h>
#include <string.h>
#include <stdint.h>

#include "dns.h"

#define BUF_SIZE 1500
#define MIN(x, y) ((x) <= (y) ? (x) : (y))


/*
* This software is licensed under the CC0.
*
* This is a _basic_ DNS Server for educational use.
* It does not prevent invalid packets from crashing
* the server.
*
* To test start the program and issue a DNS request:
*  dig @127.0.0.1 -p 9000 foo.bar.com 
*/


/*
* Masks and constants.
*/

static const uint32_t QR_MASK = 0x8000;
static const uint32_t OPCODE_MASK = 0x7800;
static const uint32_t AA_MASK = 0x0400;
static const uint32_t TC_MASK = 0x0200;
static const uint32_t RD_MASK = 0x0100;
static const uint32_t RA_MASK = 0x8000;
static const uint32_t RCODE_MASK = 0x000F;


#undef strdup

char *strdup(const char *s ) {        
	char *t = NULL;        
	if (s && (t = (char*)malloc(strlen(s) + 1)))        
		strcpy(t, s);
	
	return t;
}



static int get_A_Record(uint8_t addr[4], const char domain_name[])
{
  if (strcmp("foo.bar.com", domain_name) == 0) {
    addr[0] = 192;
    addr[1] = 168;
    addr[2] = 232;
    addr[3] = 133;
    return 0;
  } else {
    return -1;
  }
}

static int get_AAAA_Record(uint8_t addr[16], const char domain_name[])
{
  if (strcmp("foo.bar.com", domain_name) == 0) {
    addr[0] = 0xfe;
    addr[1] = 0x80;
    addr[2] = 0x00;
    addr[3] = 0x00;
    addr[4] = 0x00;
    addr[5] = 0x00;
    addr[6] = 0x00;
    addr[7] = 0x00;
    addr[8] = 0x00;
    addr[9] = 0x00;
    addr[10] = 0x00;
    addr[11] = 0x00;
    addr[12] = 0x00;
    addr[13] = 0x00;
    addr[14] = 0x00;
    addr[15] = 0x01;
    return 0;
  } else {
    return -1;
  }
}

static int get_TXT_Record(char **addr, const char domain_name[])
{
  if (strcmp("foo.bar.com", domain_name) == 0) {
    *addr = "abcdefg";
    return 0;
  } else {
    return -1;
  }
}

/*
* Debuggigng functions.
*/

/*
static void print_hex(uint8_t *buf, size_t len)
{
  size_t i;
  printf("%zu bytes:\n", len);
  for (i = 0; i < len; ++i)
    printf("%02x ", buf[i]);
  printf("\n");
}
*/

static void print_resource_record(struct ResourceRecord *rr)
{
  int i;
  while (rr) {
    printf("  ResourceRecord { name '%s', type %u, class %u, ttl %u, rd_length %u, ",
      rr->name,
      rr->type,
      rr->class,
      rr->ttl,
      rr->rd_length
   );

    union ResourceData *rd = &rr->rd_data;
    switch (rr->type) {
      case A_Resource_RecordType:
        printf("Address Resource Record { address ");

        for(i = 0; i < 4; ++i)
          printf("%s%u", (i ? "." : ""), rd->a_record.addr[i]);

        printf(" }");
        break;
      case AAAA_Resource_RecordType:
        printf("AAAA Resource Record { address ");

        for(i = 0; i < 16; ++i)
          printf("%s%02x", (i ? ":" : ""), rd->aaaa_record.addr[i]);

        printf(" }");
        break;
      case TXT_Resource_RecordType:
        printf("Text Resource Record { txt_data '%s' }",
          rd->txt_record.txt_data
       );
        break;
      default:
        printf("Unknown Resource Record { ??? }");
    }
    printf("}\n");
    rr = rr->next;
  }
}

void print_message(struct Message *msg)
{
  struct Question *q;

  printf("QUERY { ID: %02x", msg->id);
  printf(". FIELDS: [ QR: %u, OpCode: %u ]", msg->qr, msg->opcode);
  printf(", QDcount: %u", msg->qdCount);
  printf(", ANcount: %u", msg->anCount);
  printf(", NScount: %u", msg->nsCount);
  printf(", ARcount: %u,\n", msg->arCount);

  q = msg->questions;
  while (q) {
    printf("  Question { qName '%s', qType %u, qClass %u }\n",
      q->qName,
      q->qType,
      q->qClass
    );
    q = q->next;
  }

  print_resource_record(msg->answers);
  print_resource_record(msg->authorities);
  print_resource_record(msg->additionals);

  printf("}\n");
}


/*
* Basic memory operations.
*/

static size_t get16bits(const uint8_t **buffer)
{
  uint16_t value;

  memcpy(&value, *buffer, 2);
  *buffer += 2;

  return ntohs(value);
}

static void put8bits(uint8_t **buffer, uint8_t value)
{
  memcpy(*buffer, &value, 1);
  *buffer += 1;
}

static void put16bits(uint8_t **buffer, uint16_t value)
{
  value = htons(value);
  memcpy(*buffer, &value, 2);
  *buffer += 2;
}

static void put32bits(uint8_t **buffer, uint32_t value)
{
  value = htonl(value);
  memcpy(*buffer, &value, 4);
  *buffer += 4;
}


/*
* Deconding/Encoding functions.
*/

// 3foo3bar3com0 => foo.bar.com (No full validation is done!)
static char *decode_domain_name(const uint8_t **buf, size_t len)
{
  char domain[256];
  unsigned int i = 0;
  for ( i = 1; i < MIN(256, len); i += 1) {
    uint8_t c = (*buf)[i];
    if (c == 0) {
      domain[i - 1] = 0;
      *buf += i + 1;
      return strdup(domain);
    } else if (c <= 63) {
      domain[i - 1] = '.';
    } else {
      domain[i - 1] = c;
    }
  }

  return NULL;
}

// foo.bar.com => 3foo3bar3com0
static void encode_domain_name(uint8_t **buffer, const char *domain)
{
  uint8_t *buf = *buffer;
  const char *beg = domain;
  const char *pos;
  int len = 0;
  int i = 0;

  while ((pos = strchr(beg, '.'))) {
    len = pos - beg;
    buf[i] = len;
    i += 1;
    memcpy(buf+i, beg, len);
    i += len;

    beg = pos + 1;
  }

  len = strlen(domain) - (beg - domain);

  buf[i] = len;
  i += 1;

  memcpy(buf + i, beg, len);
  i += len;

  buf[i] = 0;
  i += 1;

  *buffer += i;
}


static void decode_header(struct Message *msg, const uint8_t **buffer)
{
  msg->id = get16bits(buffer);

  uint32_t fields = get16bits(buffer);
  msg->qr = (fields & QR_MASK) >> 15;
  msg->opcode = (fields & OPCODE_MASK) >> 11;
  msg->aa = (fields & AA_MASK) >> 10;
  msg->tc = (fields & TC_MASK) >> 9;
  msg->rd = (fields & RD_MASK) >> 8;
  msg->ra = (fields & RA_MASK) >> 7;
  msg->rcode = (fields & RCODE_MASK) >> 0;

  msg->qdCount = get16bits(buffer);
  msg->anCount = get16bits(buffer);
  msg->nsCount = get16bits(buffer);
  msg->arCount = get16bits(buffer);
}

static void encode_header(struct Message *msg, uint8_t **buffer)
{
  put16bits(buffer, msg->id);

  int fields = 0;
  fields |= (msg->qr << 15) & QR_MASK;
  fields |= (msg->rcode << 0) & RCODE_MASK;
  // TODO: insert the rest of the fields
  put16bits(buffer, fields);

  put16bits(buffer, msg->qdCount);
  put16bits(buffer, msg->anCount);
  put16bits(buffer, msg->nsCount);
  put16bits(buffer, msg->arCount);
}

int decode_msg(struct Message *msg, const uint8_t *buffer, int size)
{
  unsigned int i;

  decode_header(msg, &buffer);

  if (msg->anCount != 0 || msg->nsCount != 0) {
    printf("Only questions expected!\n");
    return -1;
  }

  // parse questions
  uint32_t qcount = msg->qdCount;
  for (i = 0; i < qcount; ++i) {
    struct Question *q = malloc(sizeof(struct Question));

    q->qName = decode_domain_name(&buffer, size);
    q->qType = get16bits(&buffer);
    q->qClass = get16bits(&buffer);

    if (q->qName == NULL) {
      printf("Failed to decode domain name!\n");
      return -1;
    }

    // prepend question to questions list
    q->next = msg->questions;
    msg->questions = q;
  }

  // We do not expect any resource records to parse here.

  return 0;
}

// For every question in the message add a appropiate resource record
// in either section 'answers', 'authorities' or 'additionals'.
void resolve_query(struct Message *msg)
{
  struct ResourceRecord *beg;
  struct ResourceRecord *rr;
  struct Question *q;
  int rc;

  // leave most values intact for response
  msg->qr = 1; // this is a response
  msg->aa = 1; // this server is authoritative
  msg->ra = 0; // no recursion available
  msg->rcode = Ok_ResponseType;

  // should already be 0
  msg->anCount = 0;
  msg->nsCount = 0;
  msg->arCount = 0;

  // for every question append resource records
  q = msg->questions;
  while (q) {
    rr = malloc(sizeof(struct ResourceRecord)); //malloc
    memset(rr, 0, sizeof(struct ResourceRecord));

    rr->name = strdup(q->qName);
    rr->type = q->qType;
    rr->class = q->qClass;
    rr->ttl = 60*60; // in seconds; 0 means no caching

    //printf("Query for '%s'\n", q->qName);

    // We only can only answer two question types so far
    // and the answer (resource records) will be all put
    // into the answers list.
    // This behavior is probably non-standard!
    switch (q->qType) {
      case A_Resource_RecordType:
        rr->rd_length = 4;
        rc = get_A_Record(rr->rd_data.a_record.addr, q->qName);
        if (rc < 0)
        {
          free(rr->name);
          free(rr);
          goto next;
        }
        break;
      case AAAA_Resource_RecordType:
        rr->rd_length = 16;
        rc = get_AAAA_Record(rr->rd_data.aaaa_record.addr, q->qName);
        if (rc < 0)
        {
          free(rr->name);
          free(rr);
          goto next;
        }
        break;
      case TXT_Resource_RecordType:
        rc = get_TXT_Record(&(rr->rd_data.txt_record.txt_data), q->qName);
        if (rc < 0) {
          free(rr->name);
          free(rr);
          goto next;
        }
        int txt_data_len = strlen(rr->rd_data.txt_record.txt_data);
        rr->rd_length = txt_data_len + 1;
        rr->rd_data.txt_record.txt_data_len = txt_data_len;
        break;
      /*
      case NS_Resource_RecordType:
      case CNAME_Resource_RecordType:
      case SOA_Resource_RecordType:
      case PTR_Resource_RecordType:
      case MX_Resource_RecordType:
      case TXT_Resource_RecordType:
      */
      default:
        free(rr);
        msg->rcode = NotImplemented_ResponseType;
        printf("Cannot answer question of type %d.\n", q->qType);
        goto next;
    }

    msg->anCount++;

    // prepend resource record to answers list
    beg = msg->answers;
    msg->answers = rr;
    rr->next = beg;

    // jump here to omit question
    next:

    // process next question
    q = q->next;
  }
}

/* @return 0 upon failure, 1 upon success */
static int encode_resource_records(struct ResourceRecord *rr, uint8_t **buffer)
{
  int i;
  while (rr) {
    // Answer questions by attaching resource sections.
    encode_domain_name(buffer, rr->name);
    put16bits(buffer, rr->type);
    put16bits(buffer, rr->class);
    put32bits(buffer, rr->ttl);
    put16bits(buffer, rr->rd_length);

    switch (rr->type) {
      case A_Resource_RecordType:
        for(i = 0; i < 4; ++i)
          put8bits(buffer, rr->rd_data.a_record.addr[i]);
        break;
      case AAAA_Resource_RecordType:
        for(i = 0; i < 16; ++i)
          put8bits(buffer, rr->rd_data.aaaa_record.addr[i]);
        break;
      case TXT_Resource_RecordType:
        put8bits(buffer, rr->rd_data.txt_record.txt_data_len);
        for(i = 0; i < rr->rd_data.txt_record.txt_data_len; i++)
          put8bits(buffer, rr->rd_data.txt_record.txt_data[i]);
        break;
      default:
        fprintf(stderr, "Unknown type %u. => Ignore resource record.\n", rr->type);
      return 1;
    }

    rr = rr->next;
  }

  return 0;
}

/* @return 0 upon failure, 1 upon success */
int encode_msg(struct Message *msg, uint8_t **buffer)
{
  struct Question *q;
  int rc;

  encode_header(msg, buffer);

  q = msg->questions;
  while (q) {
    encode_domain_name(buffer, q->qName);
    put16bits(buffer, q->qType);
    put16bits(buffer, q->qClass);

    q = q->next;
  }

  rc = 0;
  rc |= encode_resource_records(msg->answers, buffer);
  rc |= encode_resource_records(msg->authorities, buffer);
  rc |= encode_resource_records(msg->additionals, buffer);

  return rc;
}

void free_resource_records(struct ResourceRecord *rr)
{
  struct ResourceRecord *next;

  while (rr) {
    free(rr->name);
    next = rr->next;
    free(rr);
    rr = next;
  }
}

void free_questions(struct Question *qq)
{
  struct Question *next;

  while (qq) {
    free(qq->qName);
    next = qq->next;
    free(qq);
    qq = next;
  }
}

(dpdk_udp.c)

#include <rte_eal.h>
#include <rte_ethdev.h>
#include <rte_mbuf.h>

#include <rte_kni.h>

#include <stdio.h>
#include <arpa/inet.h>

#include "dns.h"


#define MBUF_NUMBER		8196
#define MBUF_SIZE		32


#define ENABLE_SEND	1
#define ENABLE_KNI_APP 	1	
#define ENABLE_DNS_APP	1	


#define ENABLE_PROMISCUOUS	0

#define DNS_UDP_PORT	53

int gDpdkPortId = 0;


#if ENABLE_KNI_APP

struct rte_kni *global_kni = NULL;

#endif


//
#if ENABLE_SEND

static uint8_t gSrcMac[RTE_ETHER_ADDR_LEN];
static uint8_t gDstMac[RTE_ETHER_ADDR_LEN];

// 192.168.1.123
static uint32_t gSrcIp; 
static uint32_t gDstIp;

static uint16_t gSrcPort;
static uint16_t gDstPort;

#endif

//int encode_udp_pkt()


#if ENABLE_KNI_APP

static int g_config_network_if(uint16_t port_id, uint8_t if_up) {

	if (!rte_eth_dev_is_valid_port(port_id)) {
		return -EINVAL;
	}

	int ret = 0;
	if (if_up) {

		rte_eth_dev_stop(port_id);
		ret = rte_eth_dev_start(port_id);

	} else {

		rte_eth_dev_stop(port_id);

	}

	if (ret < 0) {
		printf("Failed to start port : %d\n", port_id);
	}

	return 0;
}


#endif




#if ENABLE_SEND

static struct rte_mbuf *alloc_udp_pkt(struct rte_mempool *pool, uint8_t *data, 
	uint16_t length) {

// 32, 2048 + hdrsize
	struct rte_mbuf *mbuf = rte_pktmbuf_alloc(pool);  //
	if (!mbuf) {
		rte_exit(EXIT_FAILURE, "rte_pktmbuf_alloc error\n");
	}

	mbuf->pkt_len = length + sizeof(struct rte_ipv4_hdr) + sizeof(struct rte_ether_hdr);
	mbuf->data_len = length + sizeof(struct rte_ipv4_hdr) + sizeof(struct rte_ether_hdr);

	uint8_t *msg = rte_pktmbuf_mtod(mbuf, uint8_t*);
	// ether 
	struct rte_ether_hdr *eth = (struct rte_ether_hdr *)msg;
	rte_memcpy(eth->s_addr.addr_bytes, gSrcMac, RTE_ETHER_ADDR_LEN);
	rte_memcpy(eth->d_addr.addr_bytes, gDstMac, RTE_ETHER_ADDR_LEN);
	eth->ether_type = htons(RTE_ETHER_TYPE_IPV4);
	// 6 + 
	/* 6 bytes    6 bytes    2 bytes
	 +----------+----------+------+
	 | src mac  | dst mac  | type |
	 +----------+----------+------+
	 */

	// iphdr
	struct rte_ipv4_hdr *ip = (struct rte_ipv4_hdr *)(msg + sizeof(struct rte_ether_hdr));
	ip->version_ihl = 0x45;
	ip->type_of_service = 0;
	ip->total_length = htons(length + sizeof(struct rte_ipv4_hdr));
	ip->packet_id = 0;
	ip->fragment_offset = 0;
	ip->time_to_live = 64; // ttl = 64
	ip->next_proto_id = IPPROTO_UDP;
	ip->src_addr = gSrcIp;
	ip->dst_addr = gDstIp;
	
	ip->hdr_checksum = 0;
	ip->hdr_checksum = rte_ipv4_cksum(ip);

	// udphdr
	struct rte_udp_hdr *udp = (struct rte_udp_hdr *)(msg + sizeof(struct rte_ether_hdr) + sizeof(struct rte_ipv4_hdr));
	udp->src_port = gSrcPort;
	udp->dst_port = gDstPort;
	//uint16_t udplen = length - sizeof(struct rte_ether_hdr) - sizeof(struct rte_ipv4_hdr);
	udp->dgram_len = htons(length);

	rte_memcpy((uint8_t*)(udp+1), data, length-sizeof(struct rte_udp_hdr));

	udp->dgram_cksum = 0;
	udp->dgram_cksum = rte_ipv4_udptcp_cksum(ip, udp);

	
	
	return mbuf;

}

#endif

// 192.168.1.26

//  echo 1 > /sys/devices/virtual/net/vEth0/carrier

// ifconfig vEth0 192.168.1.33 up

int main(int argc, char *argv[]) {

	// 4G, hugepage, bind pci 
	if (rte_eal_init(argc, argv) < 0) {

		rte_exit(EXIT_FAILURE, "Error\n");

	}
	//per_lcore_socket_id;
	struct rte_mempool *mbuf_pool = rte_pktmbuf_pool_create("mbufpool", MBUF_NUMBER,0,0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
	if (!mbuf_pool) {
        rte_exit(EXIT_FAILURE, "mbuf Error\n");
	}

#if ENABLE_KNI_APP

	if (-1 == rte_kni_init(gDpdkPortId)) {
		rte_exit(EXIT_FAILURE, "kni init failed\n");
	}

#endif

	// setup
	uint16_t nb_rx_queues = 1;
#if ENABLE_SEND
	uint16_t nb_tx_queues = 1;
#else
	uint16_t nb_tx_queues = 0;
#endif
	const struct rte_eth_conf port_conf_default = {
		.rxmode = {.max_rx_pkt_len = RTE_ETHER_MAX_LEN }
	};
	rte_eth_dev_configure(gDpdkPortId, nb_rx_queues, nb_tx_queues, &port_conf_default);

	rte_eth_rx_queue_setup(gDpdkPortId, 0, 128, 
		rte_eth_dev_socket_id(gDpdkPortId), NULL, mbuf_pool);

#if ENABLE_SEND
	
	rte_eth_tx_queue_setup(gDpdkPortId, 0, 1024, rte_eth_dev_socket_id(gDpdkPortId),
		NULL);
#endif	

	rte_eth_dev_start(gDpdkPortId);

	// disable
#if ENABLE_PROMISCUOUS
	rte_eth_promiscuous_enable(gDpdkPortId); //
#endif

#if ENABLE_KNI_APP

	struct rte_kni_conf conf;
	memset(&conf, 0, sizeof(conf));

	snprintf(conf.name, RTE_KNI_NAMESIZE, "vEth%d", gDpdkPortId);
	conf.group_id = gDpdkPortId;
	conf.mbuf_size = RTE_MBUF_DEFAULT_BUF_SIZE;
	//conf.

	rte_eth_macaddr_get(gDpdkPortId, (struct rte_ether_addr*)conf.mac_addr);
	rte_eth_dev_get_mtu(gDpdkPortId, &conf.mtu);


	struct rte_kni_ops ops;
	memset(&ops, 0, sizeof(ops));
	ops.port_id = gDpdkPortId;
	ops.config_network_if = g_config_network_if;

	global_kni = rte_kni_alloc(mbuf_pool, &conf, &ops);

#endif


#if ENABLE_DNS_APP

	struct Message msg;
  	memset(&msg, 0, sizeof(struct Message));

#endif
	
	while (1) {

		unsigned num_recvd = 0;
		unsigned i = 0;

#if ENABLE_KNI_APP

		struct rte_mbuf *kni_burst[MBUF_SIZE];
		num_recvd = rte_kni_rx_burst(global_kni, kni_burst, MBUF_SIZE);
		if (num_recvd > MBUF_SIZE) {
			rte_exit(EXIT_FAILURE, "rte_kni_rx_burst Error\n");
		}

		unsigned nb_tx = rte_eth_tx_burst(gDpdkPortId, 0, kni_burst, num_recvd);
		if (nb_tx < num_recvd) {

			for (i = nb_tx;i < num_recvd;i ++) {
				rte_pktmbuf_free(kni_burst[i]);
				kni_burst[i] = NULL;
			}
			
		}

#endif
	
	
		struct rte_mbuf *mbufs[MBUF_SIZE];
		num_recvd = rte_eth_rx_burst(gDpdkPortId, 0, mbufs, MBUF_SIZE);
		if (num_recvd > MBUF_SIZE) {
			rte_exit(EXIT_FAILURE, "rte_eth_rx_burst Error\n");
		}
		
		for (i = 0;i < num_recvd;i ++) {

			struct rte_ether_hdr *ehdr = rte_pktmbuf_mtod(mbufs[i], struct rte_ether_hdr *);
			if (ehdr->ether_type != rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
#if ENABLE_KNI_APP

				rte_kni_tx_burst(global_kni, &mbufs[i], 1);
				rte_kni_handle_request(global_kni);

#endif
				continue;
			}

			struct rte_ipv4_hdr *iphdr = rte_pktmbuf_mtod_offset(mbufs[i], struct rte_ipv4_hdr *, sizeof(struct rte_ether_hdr));
			if (iphdr->next_proto_id == IPPROTO_UDP) {

				struct rte_udp_hdr* udphdr = (struct rte_udp_hdr*)(iphdr + 1);

#if ENABLE_DNS_APP
				if (ntohs(udphdr->dst_port) == DNS_UDP_PORT) { //dns

					// dns --> 
					printf("dns request\n");
					
                    rte_memcpy(gSrcMac, ehdr->d_addr.addr_bytes, RTE_ETHER_ADDR_LEN);
                    rte_memcpy(gDstMac, ehdr->s_addr.addr_bytes, RTE_ETHER_ADDR_LEN);
                    
                    rte_memcpy(&gSrcIp, &iphdr->dst_addr, sizeof(uint32_t));
                    rte_memcpy(&gDstIp, &iphdr->src_addr, sizeof(uint32_t));
    
                    rte_memcpy(&gSrcPort, &udphdr->dst_port, sizeof(uint16_t));
                    rte_memcpy(&gDstPort, &udphdr->src_port, sizeof(uint16_t));

					uint16_t length = ntohs(udphdr->dgram_len);
					uint16_t nbytes = length - sizeof(struct rte_udp_hdr);
					
					
                	uint8_t *data = (uint8_t*)(udphdr + 1);

//  --> 
                	free_questions(msg.questions);
				    free_resource_records(msg.answers);
				    free_resource_records(msg.authorities);
				    free_resource_records(msg.additionals);
				    memset(&msg, 0, sizeof(struct Message));

					if (decode_msg(&msg, data, nbytes) != 0) {
						rte_pktmbuf_free(mbufs[i]); // 
				    	continue;
				    }

				    resolve_query(&msg);

				    uint8_t *p = data;
				    if (encode_msg(&msg, &p) != 0) {
				    	rte_pktmbuf_free(mbufs[i]);
				      	continue;
				    }

				    uint16_t len = p - data;

					struct rte_mbuf *mbuf = alloc_udp_pkt(mbuf_pool, data, len+sizeof(struct rte_udp_hdr));

					rte_eth_tx_burst(gDpdkPortId, 0, &mbuf, 1);
					
				}

#endif
				else if (ntohs(udphdr->dst_port) != 8888) {
					rte_pktmbuf_free(mbufs[i]);
					continue;
				}
				
                uint16_t length = ntohs(udphdr->dgram_len);
                *((char*) udphdr + length) = '\0';
                
                struct in_addr addr;
                addr.s_addr = iphdr->src_addr;
                printf("src: %s:%d, ", inet_ntoa(addr), ntohs(udphdr->src_port));

                addr.s_addr = iphdr->dst_addr;
                printf("dst: %s:%d, %s\n", inet_ntoa(addr), ntohs(udphdr->dst_port),
                    (char *)(udphdr+1));

#if ENABLE_SEND

				rte_memcpy(gSrcMac, ehdr->d_addr.addr_bytes, RTE_ETHER_ADDR_LEN);
				rte_memcpy(gDstMac, ehdr->s_addr.addr_bytes, RTE_ETHER_ADDR_LEN);
				
				rte_memcpy(&gSrcIp, &iphdr->dst_addr, sizeof(uint32_t));
				rte_memcpy(&gDstIp, &iphdr->src_addr, sizeof(uint32_t));

				rte_memcpy(&gSrcPort, &udphdr->dst_port, sizeof(uint16_t));
				rte_memcpy(&gDstPort, &udphdr->src_port, sizeof(uint16_t));

				
				//
				//length + sizeof(struct iphdr)
				struct rte_mbuf *mbuf = alloc_udp_pkt(mbuf_pool, (uint8_t*)(udphdr+1), length);

				rte_eth_tx_burst(gDpdkPortId, 0, &mbuf, 1);
#endif


				
				
			} else {

#if ENABLE_KNI_APP 
				rte_kni_tx_burst(global_kni, &mbufs[i], 1);

#endif

			}
		}

#if ENABLE_KNI_APP

		rte_kni_handle_request(global_kni);
#endif
	}
	

}

Makefile

# binary name
APP = dpdk_udp

# all source are stored in SRCS-y
SRCS-y := dpdk_udp.c dpdk-dns.c 
# Build using pkg-config variables if possible
ifeq ($(shell pkg-config --exists libdpdk && echo 0),0)

all: shared
.PHONY: shared static
shared: build/$(APP)-shared
	ln -sf $(APP)-shared build/$(APP)
static: build/$(APP)-static
	ln -sf $(APP)-static build/$(APP)

PKGCONF=pkg-config --define-prefix

PC_FILE := $(shell $(PKGCONF) --path libdpdk)
CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk)
LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk)
LDFLAGS_STATIC = -Wl,-Bstatic $(shell $(PKGCONF) --static --libs libdpdk)

build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build
	$(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED)

build/$(APP)-static: $(SRCS-y) Makefile $(PC_FILE) | build
	$(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_STATIC)

build:
	@mkdir -p $@

.PHONY: clean
clean:
	rm -f build/$(APP) build/$(APP)-static build/$(APP)-shared
	test -d build && rmdir -p build || true

else

ifeq ($(RTE_SDK),)
$(error "Please define RTE_SDK environment variable")
endif

# Default target, detect a build directory, by looking for a path with a .config
RTE_TARGET ?= $(notdir $(abspath $(dir $(firstword $(wildcard $(RTE_SDK)/*/.config)))))

include $(RTE_SDK)/mk/rte.vars.mk

CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS)

include $(RTE_SDK)/mk/rte.extapp.mk

endif

注意dpdk-dns.c隐含一个问题它使用malloc来分配内存这是不合适的应该改为rte_malloc(…)来申请内存。

编译和执行

编译

make

运行

./build/dpdk_udp

启动网卡

ifconfig vEth0 192.168.7.26 up

设置可读写内核

echo 1 > /sys/devices/virtual/net/vEth0/carrier

总结

1DPDK单独使用时主要用于测试网卡性能一般DPDK搭配协议栈使用DPDK+协议栈+适配层+应用这会集成协议栈可以应用在路由器、网关、防火墙等。

2DPDK不仅可以处理DNS协议也可以处理http、mqtt等协议。但是这些协议不仅DPDK可以做像redis、mysql等也可以做使用DPDK来开发的场景

  1. 网卡性能瓶颈导致DNS、NTP等协议主要是查表可以使用DPDK提高网卡性能 。
  2. 协议比较简单。
  3. 没有磁盘操作或磁盘操作较少。

阿里云国内75折 回扣 微信号:monov8
阿里云国际,腾讯云国际,低至75折。AWS 93折 免费开户实名账号 代冲值 优惠多多 微信号:monov8 飞机:@monov6
标签: 服务器