/* * ar8216.h: AR8216 switch driver * * Copyright (C) 2009 Felix Fietkau * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ #ifndef __AR8216_H #define __AR8216_H #define BITS(_s, _n) (((1UL << (_n)) - 1) << _s) #define AR8XXX_CAP_GIGE BIT(0) #define AR8XXX_CAP_MIB_COUNTERS BIT(1) #define AR8XXX_NUM_PHYS 5 #define AR8216_PORT_CPU 0 #define AR8216_NUM_PORTS 6 #define AR8216_NUM_VLANS 16 #define AR8316_NUM_VLANS 4096 /* size of the vlan table */ #define AR8X16_MAX_VLANS 128 #define AR8X16_PROBE_RETRIES 10 #define AR8X16_MAX_PORTS 8 /* Atheros specific MII registers */ #define MII_ATH_MMD_ADDR 0x0d #define MII_ATH_MMD_DATA 0x0e #define MII_ATH_DBG_ADDR 0x1d #define MII_ATH_DBG_DATA 0x1e #define AR8216_REG_CTRL 0x0000 #define AR8216_CTRL_REVISION BITS(0, 8) #define AR8216_CTRL_REVISION_S 0 #define AR8216_CTRL_VERSION BITS(8, 8) #define AR8216_CTRL_VERSION_S 8 #define AR8216_CTRL_RESET BIT(31) #define AR8216_REG_FLOOD_MASK 0x002C #define AR8216_FM_UNI_DEST_PORTS BITS(0, 6) #define AR8216_FM_MULTI_DEST_PORTS BITS(16, 6) #define AR8236_FM_CPU_BROADCAST_EN BIT(26) #define AR8236_FM_CPU_BCAST_FWD_EN BIT(25) #define AR8216_REG_GLOBAL_CTRL 0x0030 #define AR8216_GCTRL_MTU BITS(0, 11) #define AR8236_GCTRL_MTU BITS(0, 14) #define AR8316_GCTRL_MTU BITS(0, 14) #define AR8216_REG_VTU 0x0040 #define AR8216_VTU_OP BITS(0, 3) #define AR8216_VTU_OP_NOOP 0x0 #define AR8216_VTU_OP_FLUSH 0x1 #define AR8216_VTU_OP_LOAD 0x2 #define AR8216_VTU_OP_PURGE 0x3 #define AR8216_VTU_OP_REMOVE_PORT 0x4 #define AR8216_VTU_ACTIVE BIT(3) #define AR8216_VTU_FULL BIT(4) #define AR8216_VTU_PORT BITS(8, 4) #define AR8216_VTU_PORT_S 8 #define AR8216_VTU_VID BITS(16, 12) #define AR8216_VTU_VID_S 16 #define AR8216_VTU_PRIO BITS(28, 3) #define AR8216_VTU_PRIO_S 28 #define AR8216_VTU_PRIO_EN BIT(31) #define AR8216_REG_VTU_DATA 0x0044 #define AR8216_VTUDATA_MEMBER BITS(0, 10) #define AR8236_VTUDATA_MEMBER BITS(0, 7) #define AR8216_VTUDATA_VALID BIT(11) #define AR8216_REG_ATU_FUNC0 0x0050 #define AR8216_ATU_OP BITS(0, 3) #define AR8216_ATU_OP_NOOP 0x0 #define AR8216_ATU_OP_FLUSH 0x1 #define AR8216_ATU_OP_LOAD 0x2 #define AR8216_ATU_OP_PURGE 0x3 #define AR8216_ATU_OP_FLUSH_UNLOCKED 0x4 #define AR8216_ATU_OP_FLUSH_PORT 0x5 #define AR8216_ATU_OP_GET_NEXT 0x6 #define AR8216_ATU_ACTIVE BIT(3) #define AR8216_ATU_PORT_NUM BITS(8, 4) #define AR8216_ATU_PORT_NUM_S 8 #define AR8216_ATU_FULL_VIO BIT(12) #define AR8216_ATU_ADDR5 BITS(16, 8) #define AR8216_ATU_ADDR5_S 16 #define AR8216_ATU_ADDR4 BITS(24, 8) #define AR8216_ATU_ADDR4_S 24 #define AR8216_REG_ATU_FUNC1 0x0054 #define AR8216_ATU_ADDR3 BITS(0, 8) #define AR8216_ATU_ADDR3_S 0 #define AR8216_ATU_ADDR2 BITS(8, 8) #define AR8216_ATU_ADDR2_S 8 #define AR8216_ATU_ADDR1 BITS(16, 8) #define AR8216_ATU_ADDR1_S 16 #define AR8216_ATU_ADDR0 BITS(24, 8) #define AR8216_ATU_ADDR0_S 24 #define AR8216_REG_ATU_FUNC2 0x0058 #define AR8216_ATU_PORTS BITS(0, 6) #define AR8216_ATU_PORT0 BIT(0) #define AR8216_ATU_PORT1 BIT(1) #define AR8216_ATU_PORT2 BIT(2) #define AR8216_ATU_PORT3 BIT(3) #define AR8216_ATU_PORT4 BIT(4) #define AR8216_ATU_PORT5 BIT(5) #define AR8216_ATU_STATUS BITS(16, 4) #define AR8216_ATU_STATUS_S 16 #define AR8216_REG_ATU_CTRL 0x005C #define AR8216_ATU_CTRL_AGE_EN BIT(17) #define AR8216_ATU_CTRL_AGE_TIME BITS(0, 16) #define AR8216_ATU_CTRL_AGE_TIME_S 0 #define AR8236_ATU_CTRL_RES BIT(20) #define AR8216_REG_MIB_FUNC 0x0080 #define AR8216_MIB_TIMER BITS(0, 16) #define AR8216_MIB_AT_HALF_EN BIT(16) #define AR8216_MIB_BUSY BIT(17) #define AR8216_MIB_FUNC BITS(24, 3) #define AR8216_MIB_FUNC_S 24 #define AR8216_MIB_FUNC_NO_OP 0x0 #define AR8216_MIB_FUNC_FLUSH 0x1 #define AR8216_MIB_FUNC_CAPTURE 0x3 #define AR8236_MIB_EN BIT(30) #define AR8216_REG_GLOBAL_CPUPORT 0x0078 #define AR8216_GLOBAL_CPUPORT_MIRROR_PORT BITS(4, 4) #define AR8216_GLOBAL_CPUPORT_MIRROR_PORT_S 4 #define AR8216_PORT_OFFSET(_i) (0x0100 * (_i + 1)) #define AR8216_REG_PORT_STATUS(_i) (AR8216_PORT_OFFSET(_i) + 0x0000) #define AR8216_PORT_STATUS_SPEED BITS(0,2) #define AR8216_PORT_STATUS_SPEED_S 0 #define AR8216_PORT_STATUS_TXMAC BIT(2) #define AR8216_PORT_STATUS_RXMAC BIT(3) #define AR8216_PORT_STATUS_TXFLOW BIT(4) #define AR8216_PORT_STATUS_RXFLOW BIT(5) #define AR8216_PORT_STATUS_DUPLEX BIT(6) #define AR8216_PORT_STATUS_LINK_UP BIT(8) #define AR8216_PORT_STATUS_LINK_AUTO BIT(9) #define AR8216_PORT_STATUS_LINK_PAUSE BIT(10) #define AR8216_REG_PORT_CTRL(_i) (AR8216_PORT_OFFSET(_i) + 0x0004) /* port forwarding state */ #define AR8216_PORT_CTRL_STATE BITS(0, 3) #define AR8216_PORT_CTRL_STATE_S 0 #define AR8216_PORT_CTRL_LEARN_LOCK BIT(7) /* egress 802.1q mode */ #define AR8216_PORT_CTRL_VLAN_MODE BITS(8, 2) #define AR8216_PORT_CTRL_VLAN_MODE_S 8 #define AR8216_PORT_CTRL_IGMP_SNOOP BIT(10) #define AR8216_PORT_CTRL_HEADER BIT(11) #define AR8216_PORT_CTRL_MAC_LOOP BIT(12) #define AR8216_PORT_CTRL_SINGLE_VLAN BIT(13) #define AR8216_PORT_CTRL_LEARN BIT(14) #define AR8216_PORT_CTRL_MIRROR_TX BIT(16) #define AR8216_PORT_CTRL_MIRROR_RX BIT(17) #define AR8216_REG_PORT_VLAN(_i) (AR8216_PORT_OFFSET(_i) + 0x0008) #define AR8216_PORT_VLAN_DEFAULT_ID BITS(0, 12) #define AR8216_PORT_VLAN_DEFAULT_ID_S 0 #define AR8216_PORT_VLAN_DEST_PORTS BITS(16, 9) #define AR8216_PORT_VLAN_DEST_PORTS_S 16 /* bit0 added to the priority field of egress frames */ #define AR8216_PORT_VLAN_TX_PRIO BIT(27) /* port default priority */ #define AR8216_PORT_VLAN_PRIORITY BITS(28, 2) #define AR8216_PORT_VLAN_PRIORITY_S 28 /* ingress 802.1q mode */ #define AR8216_PORT_VLAN_MODE BITS(30, 2) #define AR8216_PORT_VLAN_MODE_S 30 #define AR8216_REG_PORT_RATE(_i) (AR8216_PORT_OFFSET(_i) + 0x000c) #define AR8216_REG_PORT_PRIO(_i) (AR8216_PORT_OFFSET(_i) + 0x0010) #define AR8216_STATS_RXBROAD 0x00 #define AR8216_STATS_RXPAUSE 0x04 #define AR8216_STATS_RXMULTI 0x08 #define AR8216_STATS_RXFCSERR 0x0c #define AR8216_STATS_RXALIGNERR 0x10 #define AR8216_STATS_RXRUNT 0x14 #define AR8216_STATS_RXFRAGMENT 0x18 #define AR8216_STATS_RX64BYTE 0x1c #define AR8216_STATS_RX128BYTE 0x20 #define AR8216_STATS_RX256BYTE 0x24 #define AR8216_STATS_RX512BYTE 0x28 #define AR8216_STATS_RX1024BYTE 0x2c #define AR8216_STATS_RXMAXBYTE 0x30 #define AR8216_STATS_RXTOOLONG 0x34 #define AR8216_STATS_RXGOODBYTE 0x38 #define AR8216_STATS_RXBADBYTE 0x40 #define AR8216_STATS_RXOVERFLOW 0x48 #define AR8216_STATS_FILTERED 0x4c #define AR8216_STATS_TXBROAD 0x50 #define AR8216_STATS_TXPAUSE 0x54 #define AR8216_STATS_TXMULTI 0x58 #define AR8216_STATS_TXUNDERRUN 0x5c #define AR8216_STATS_TX64BYTE 0x60 #define AR8216_STATS_TX128BYTE 0x64 #define AR8216_STATS_TX256BYTE 0x68 #define AR8216_STATS_TX512BYTE 0x6c #define AR8216_STATS_TX1024BYTE 0x70 #define AR8216_STATS_TXMAXBYTE 0x74 #define AR8216_STATS_TXOVERSIZE 0x78 #define AR8216_STATS_TXBYTE 0x7c #define AR8216_STATS_TXCOLLISION 0x84 #define AR8216_STATS_TXABORTCOL 0x88 #define AR8216_STATS_TXMULTICOL 0x8c #define AR8216_STATS_TXSINGLECOL 0x90 #define AR8216_STATS_TXEXCDEFER 0x94 #define AR8216_STATS_TXDEFER 0x98 #define AR8216_STATS_TXLATECOL 0x9c #define AR8236_REG_PORT_VLAN(_i) (AR8216_PORT_OFFSET((_i)) + 0x0008) #define AR8236_PORT_VLAN_DEFAULT_ID BITS(16, 12) #define AR8236_PORT_VLAN_DEFAULT_ID_S 16 #define AR8236_PORT_VLAN_PRIORITY BITS(29, 3) #define AR8236_PORT_VLAN_PRIORITY_S 28 #define AR8236_REG_PORT_VLAN2(_i) (AR8216_PORT_OFFSET((_i)) + 0x000c) #define AR8236_PORT_VLAN2_MEMBER BITS(16, 7) #define AR8236_PORT_VLAN2_MEMBER_S 16 #define AR8236_PORT_VLAN2_TX_PRIO BIT(23) #define AR8236_PORT_VLAN2_VLAN_MODE BITS(30, 2) #define AR8236_PORT_VLAN2_VLAN_MODE_S 30 #define AR8236_STATS_RXBROAD 0x00 #define AR8236_STATS_RXPAUSE 0x04 #define AR8236_STATS_RXMULTI 0x08 #define AR8236_STATS_RXFCSERR 0x0c #define AR8236_STATS_RXALIGNERR 0x10 #define AR8236_STATS_RXRUNT 0x14 #define AR8236_STATS_RXFRAGMENT 0x18 #define AR8236_STATS_RX64BYTE 0x1c #define AR8236_STATS_RX128BYTE 0x20 #define AR8236_STATS_RX256BYTE 0x24 #define AR8236_STATS_RX512BYTE 0x28 #define AR8236_STATS_RX1024BYTE 0x2c #define AR8236_STATS_RX1518BYTE 0x30 #define AR8236_STATS_RXMAXBYTE 0x34 #define AR8236_STATS_RXTOOLONG 0x38 #define AR8236_STATS_RXGOODBYTE 0x3c #define AR8236_STATS_RXBADBYTE 0x44 #define AR8236_STATS_RXOVERFLOW 0x4c #define AR8236_STATS_FILTERED 0x50 #define AR8236_STATS_TXBROAD 0x54 #define AR8236_STATS_TXPAUSE 0x58 #define AR8236_STATS_TXMULTI 0x5c #define AR8236_STATS_TXUNDERRUN 0x60 #define AR8236_STATS_TX64BYTE 0x64 #define AR8236_STATS_TX128BYTE 0x68 #define AR8236_STATS_TX256BYTE 0x6c #define AR8236_STATS_TX512BYTE 0x70 #define AR8236_STATS_TX1024BYTE 0x74 #define AR8236_STATS_TX1518BYTE 0x78 #define AR8236_STATS_TXMAXBYTE 0x7c #define AR8236_STATS_TXOVERSIZE 0x80 #define AR8236_STATS_TXBYTE 0x84 #define AR8236_STATS_TXCOLLISION 0x8c #define AR8236_STATS_TXABORTCOL 0x90 #define AR8236_STATS_TXMULTICOL 0x94 #define AR8236_STATS_TXSINGLECOL 0x98 #define AR8236_STATS_TXEXCDEFER 0x9c #define AR8236_STATS_TXDEFER 0xa0 #define AR8236_STATS_TXLATECOL 0xa4 #define AR8316_REG_POSTRIP 0x0008 #define AR8316_POSTRIP_MAC0_GMII_EN BIT(0) #define AR8316_POSTRIP_MAC0_RGMII_EN BIT(1) #define AR8316_POSTRIP_PHY4_GMII_EN BIT(2) #define AR8316_POSTRIP_PHY4_RGMII_EN BIT(3) #define AR8316_POSTRIP_MAC0_MAC_MODE BIT(4) #define AR8316_POSTRIP_RTL_MODE BIT(5) #define AR8316_POSTRIP_RGMII_RXCLK_DELAY_EN BIT(6) #define AR8316_POSTRIP_RGMII_TXCLK_DELAY_EN BIT(7) #define AR8316_POSTRIP_SERDES_EN BIT(8) #define AR8316_POSTRIP_SEL_ANA_RST BIT(9) #define AR8316_POSTRIP_GATE_25M_EN BIT(10) #define AR8316_POSTRIP_SEL_CLK25M BIT(11) #define AR8316_POSTRIP_HIB_PULSE_HW BIT(12) #define AR8316_POSTRIP_DBG_MODE_I BIT(13) #define AR8316_POSTRIP_MAC5_MAC_MODE BIT(14) #define AR8316_POSTRIP_MAC5_PHY_MODE BIT(15) #define AR8316_POSTRIP_POWER_DOWN_HW BIT(16) #define AR8316_POSTRIP_LPW_STATE_EN BIT(17) #define AR8316_POSTRIP_MAN_EN BIT(18) #define AR8316_POSTRIP_PHY_PLL_ON BIT(19) #define AR8316_POSTRIP_LPW_EXIT BIT(20) #define AR8316_POSTRIP_TXDELAY_S0 BIT(21) #define AR8316_POSTRIP_TXDELAY_S1 BIT(22) #define AR8316_POSTRIP_RXDELAY_S0 BIT(23) #define AR8316_POSTRIP_LED_OPEN_EN BIT(24) #define AR8316_POSTRIP_SPI_EN BIT(25) #define AR8316_POSTRIP_RXDELAY_S1 BIT(26) #define AR8316_POSTRIP_POWER_ON_SEL BIT(31) /* port speed */ enum { AR8216_PORT_SPEED_10M = 0, AR8216_PORT_SPEED_100M = 1, AR8216_PORT_SPEED_1000M = 2, AR8216_PORT_SPEED_ERR = 3, }; /* ingress 802.1q mode */ enum { AR8216_IN_PORT_ONLY = 0, AR8216_IN_PORT_FALLBACK = 1, AR8216_IN_VLAN_ONLY = 2, AR8216_IN_SECURE = 3 }; /* egress 802.1q mode */ enum { AR8216_OUT_KEEP = 0, AR8216_OUT_STRIP_VLAN = 1, AR8216_OUT_ADD_VLAN = 2 }; /* port forwarding state */ enum { AR8216_PORT_STATE_DISABLED = 0, AR8216_PORT_STATE_BLOCK = 1, AR8216_PORT_STATE_LISTEN = 2, AR8216_PORT_STATE_LEARN = 3, AR8216_PORT_STATE_FORWARD = 4 }; enum { AR8XXX_VER_AR8216 = 0x01, AR8XXX_VER_AR8236 = 0x03, AR8XXX_VER_AR8316 = 0x10, AR8XXX_VER_AR8327 = 0x12, AR8XXX_VER_AR8337 = 0x13, }; #define AR8XXX_NUM_ARL_RECORDS 100 enum arl_op { AR8XXX_ARL_INITIALIZE, AR8XXX_ARL_GET_NEXT }; struct arl_entry { u8 port; u8 mac[6]; }; struct ar8xxx_priv; struct ar8xxx_mib_desc { unsigned int size; unsigned int offset; const char *name; }; struct ar8xxx_chip { unsigned long caps; bool config_at_probe; bool mii_lo_first; /* parameters to calculate REG_PORT_STATS_BASE */ unsigned reg_port_stats_start; unsigned reg_port_stats_length; int (*hw_init)(struct ar8xxx_priv *priv); void (*cleanup)(struct ar8xxx_priv *priv); const char *name; int vlans; int ports; const struct switch_dev_ops *swops; void (*init_globals)(struct ar8xxx_priv *priv); void (*init_port)(struct ar8xxx_priv *priv, int port); void (*setup_port)(struct ar8xxx_priv *priv, int port, u32 members); u32 (*read_port_status)(struct ar8xxx_priv *priv, int port); u32 (*read_port_eee_status)(struct ar8xxx_priv *priv, int port); int (*atu_flush)(struct ar8xxx_priv *priv); int (*atu_flush_port)(struct ar8xxx_priv *priv, int port); void (*vtu_flush)(struct ar8xxx_priv *priv); void (*vtu_load_vlan)(struct ar8xxx_priv *priv, u32 vid, u32 port_mask); void (*phy_fixup)(struct ar8xxx_priv *priv, int phy); void (*set_mirror_regs)(struct ar8xxx_priv *priv); void (*get_arl_entry)(struct ar8xxx_priv *priv, struct arl_entry *a, u32 *status, enum arl_op op); int (*sw_hw_apply)(struct switch_dev *dev); const struct ar8xxx_mib_desc *mib_decs; unsigned num_mibs; unsigned mib_func; }; struct ar8xxx_priv { struct switch_dev dev; struct mii_bus *mii_bus; struct phy_device *phy; int (*get_port_link)(unsigned port); const struct net_device_ops *ndo_old; struct net_device_ops ndo; struct mutex reg_mutex; u8 chip_ver; u8 chip_rev; const struct ar8xxx_chip *chip; void *chip_data; bool initialized; bool port4_phy; char buf[2048]; struct arl_entry arl_table[AR8XXX_NUM_ARL_RECORDS]; char arl_buf[AR8XXX_NUM_ARL_RECORDS * 32 + 256]; bool link_up[AR8X16_MAX_PORTS]; bool init; struct mutex mib_lock; struct delayed_work mib_work; int mib_next_port; u64 *mib_stats; struct list_head list; unsigned int use_count; /* all fields below are cleared on reset */ bool vlan; u16 vlan_id[AR8X16_MAX_VLANS]; u8 vlan_table[AR8X16_MAX_VLANS]; u8 vlan_tagged; u16 pvid[AR8X16_MAX_PORTS]; /* mirroring */ bool mirror_rx; bool mirror_tx; int source_port; int monitor_port; }; u32 ar8xxx_mii_read32(struct ar8xxx_priv *priv, int phy_id, int regnum); void ar8xxx_mii_write32(struct ar8xxx_priv *priv, int phy_id, int regnum, u32 val); u32 ar8xxx_read(struct ar8xxx_priv *priv, int reg); void ar8xxx_write(struct ar8xxx_priv *priv, int reg, u32 val); u32 ar8xxx_rmw(struct ar8xxx_priv *priv, int reg, u32 mask, u32 val); void ar8xxx_phy_dbg_write(struct ar8xxx_priv *priv, int phy_addr, u16 dbg_addr, u16 dbg_data); void ar8xxx_phy_mmd_write(struct ar8xxx_priv *priv, int phy_addr, u16 addr, u16 data); u16 ar8xxx_phy_mmd_read(struct ar8xxx_priv *priv, int phy_addr, u16 addr); void ar8xxx_phy_init(struct ar8xxx_priv *priv); int ar8xxx_sw_set_vlan(struct switch_dev *dev, const struct switch_attr *attr, struct switch_val *val); int ar8xxx_sw_get_vlan(struct switch_dev *dev, const struct switch_attr *attr, struct switch_val *val); int ar8xxx_sw_set_reset_mibs(struct switch_dev *dev, const struct switch_attr *attr, struct switch_val *val); int ar8xxx_sw_set_mirror_rx_enable(struct switch_dev *dev, const struct switch_attr *attr, struct switch_val *val); int ar8xxx_sw_get_mirror_rx_enable(struct switch_dev *dev, const struct switch_attr *attr, struct switch_val *val); int ar8xxx_sw_set_mirror_tx_enable(struct switch_dev *dev, const struct switch_attr *attr, struct switch_val *val); int ar8xxx_sw_get_mirror_tx_enable(struct switch_dev *dev, const struct switch_attr *attr, struct switch_val *val); int ar8xxx_sw_set_mirror_monitor_port(struct switch_dev *dev, const struct switch_attr *attr, struct switch_val *val); int ar8xxx_sw_get_mirror_monitor_port(struct switch_dev *dev, const struct switch_attr *attr, struct switch_val *val); int ar8xxx_sw_set_mirror_source_port(struct switch_dev *dev, const struct switch_attr *attr, struct switch_val *val); int ar8xxx_sw_get_mirror_source_port(struct switch_dev *dev, const struct switch_attr *attr, struct switch_val *val); int ar8xxx_sw_set_pvid(struct switch_dev *dev, int port, int vlan); int ar8xxx_sw_get_pvid(struct switch_dev *dev, int port, int *vlan); int ar8xxx_sw_hw_apply(struct switch_dev *dev); int ar8xxx_sw_reset_switch(struct switch_dev *dev); int ar8xxx_sw_get_port_link(struct switch_dev *dev, int port, struct switch_port_link *link); int ar8xxx_sw_set_port_reset_mib(struct switch_dev *dev, const struct switch_attr *attr, struct switch_val *val); int ar8xxx_sw_get_port_mib(struct switch_dev *dev, const struct switch_attr *attr, struct switch_val *val); int ar8xxx_sw_get_arl_table(struct switch_dev *dev, const struct switch_attr *attr, struct switch_val *val); int ar8xxx_sw_set_flush_arl_table(struct switch_dev *dev, const struct switch_attr *attr, struct switch_val *val); int ar8xxx_sw_set_flush_port_arl_table(struct switch_dev *dev, const struct switch_attr *attr, struct switch_val *val); int ar8216_wait_bit(struct ar8xxx_priv *priv, int reg, u32 mask, u32 val); static inline struct ar8xxx_priv * swdev_to_ar8xxx(struct switch_dev *swdev) { return container_of(swdev, struct ar8xxx_priv, dev); } static inline bool ar8xxx_has_gige(struct ar8xxx_priv *priv) { return priv->chip->caps & AR8XXX_CAP_GIGE; } static inline bool ar8xxx_has_mib_counters(struct ar8xxx_priv *priv) { return priv->chip->caps & AR8XXX_CAP_MIB_COUNTERS; } static inline bool chip_is_ar8216(struct ar8xxx_priv *priv) { return priv->chip_ver == AR8XXX_VER_AR8216; } static inline bool chip_is_ar8236(struct ar8xxx_priv *priv) { return priv->chip_ver == AR8XXX_VER_AR8236; } static inline bool chip_is_ar8316(struct ar8xxx_priv *priv) { return priv->chip_ver == AR8XXX_VER_AR8316; } static inline bool chip_is_ar8327(struct ar8xxx_priv *priv) { return priv->chip_ver == AR8XXX_VER_AR8327; } static inline bool chip_is_ar8337(struct ar8xxx_priv *priv) { return priv->chip_ver == AR8XXX_VER_AR8337; } static inline void ar8xxx_reg_set(struct ar8xxx_priv *priv, int reg, u32 val) { ar8xxx_rmw(priv, reg, 0, val); } static inline void ar8xxx_reg_clear(struct ar8xxx_priv *priv, int reg, u32 val) { ar8xxx_rmw(priv, reg, val, 0); } static inline void split_addr(u32 regaddr, u16 *r1, u16 *r2, u16 *page) { regaddr >>= 1; *r1 = regaddr & 0x1e; regaddr >>= 5; *r2 = regaddr & 0x7; regaddr >>= 3; *page = regaddr & 0x1ff; } static inline void wait_for_page_switch(void) { udelay(5); } #endif 'n470' href='#n470'>470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029
/******************************************************************************
 * tools/xenbaked.c
 *
 * Tool for collecting raw trace buffer data from Xen and 
 *  performing some accumulation operations and other processing
 *  on it.
 *
 * Copyright (C) 2004 by Intel Research Cambridge
 * Copyright (C) 2005 by Hewlett Packard, Palo Alto and Fort Collins
 *
 * Authors: Diwaker Gupta, diwaker.gupta@hp.com
 *          Rob Gardner, rob.gardner@hp.com
 *          Lucy Cherkasova, lucy.cherkasova.hp.com
 * Much code based on xentrace, authored by Mark Williamson, mark.a.williamson@intel.com
 * Date:   November, 2005
 * 
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; under version 2 of the License.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include <time.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <argp.h>
#include <signal.h>
#include <xenctrl.h>
#include <xen/xen.h>
#include <string.h>

#include "xc_private.h"
typedef struct { int counter; } atomic_t;
#define _atomic_read(v)		((v).counter)

#include <xen/trace.h>
#include "xenbaked.h"

extern FILE *stderr;

/***** Compile time configuration of defaults ********************************/

/* when we've got more records than this waiting, we log it to the output */
#define NEW_DATA_THRESH 1

/* sleep for this long (milliseconds) between checking the trace buffers */
#define POLL_SLEEP_MILLIS 100

/* Size of time period represented by each sample */
#define MS_PER_SAMPLE 100

/* CPU Frequency */
#define MHZ
#define CPU_FREQ 2660 MHZ

/***** The code **************************************************************/

typedef struct settings_st {
    char *outfile;
    struct timespec poll_sleep;
    unsigned long new_data_thresh;
    unsigned long ms_per_sample;
    double cpu_freq;
} settings_t;

settings_t opts;

int interrupted = 0; /* gets set if we get a SIGHUP */
int rec_count = 0;
time_t start_time;
int dom0_flips = 0;

_new_qos_data *new_qos;
_new_qos_data **cpu_qos_data;


#define ID(X) ((X>NDOMAINS-1)?(NDOMAINS-1):X)

// array of currently running domains, indexed by cpu
int *running = NULL;

// number of cpu's on this platform
int NCPU = 0;


void init_current(int ncpu)
{
  running = calloc(ncpu, sizeof(int));
  NCPU = ncpu;
  printf("Initialized with %d %s\n", ncpu, (ncpu == 1) ? "cpu" : "cpu's");
}

int is_current(int domain, int cpu)
{
  //  int i;
  
  //  for (i=0; i<NCPU; i++)
    if (running[cpu] == domain)
      return 1;
  return 0;
}


// return the domain that's currently running on the given cpu
int current(int cpu)
{
  return running[cpu];
}

void set_current(int cpu, int domain)
{
  running[cpu] = domain;
}



void close_handler(int signal)
{
    interrupted = 1;
}

#if 0
void dump_record(int cpu, struct t_rec *x)
{
    printf("record: cpu=%x, tsc=%lx, event=%x, d1=%lx\n", 
            cpu, x->cycles, x->event, x->data[0]);
}
#endif

/**
 * millis_to_timespec - convert a time in milliseconds to a struct timespec
 * @millis:             time interval in milliseconds
 */
struct timespec millis_to_timespec(unsigned long millis)
{
    struct timespec spec;

    spec.tv_sec = millis / 1000;
    spec.tv_nsec = (millis % 1000) * 1000;

    return spec;
}


typedef struct 
{
    int event_count;
    int event_id;
    char *text;
} stat_map_t;

stat_map_t stat_map[] = {
    { 0,       0, 	    "Other" },
    { 0, TRC_SCHED_DOM_ADD, "Add Domain" },
    { 0, TRC_SCHED_DOM_REM, "Remove Domain" },
    { 0, TRC_SCHED_SLEEP, "Sleep" },
    { 0, TRC_SCHED_WAKE,  "Wake" },
    { 0, TRC_SCHED_BLOCK,  "Block" },
    { 0, TRC_SCHED_SWITCH,  "Switch" },
    { 0, TRC_SCHED_S_TIMER_FN, "Timer Func"},
    { 0, TRC_SCHED_SWITCH_INFPREV,  "Switch Prev" },
    { 0, TRC_SCHED_SWITCH_INFNEXT,  "Switch Next" },
    { 0, TRC_MEM_PAGE_GRANT_MAP,  "Page Map" },
    { 0, TRC_MEM_PAGE_GRANT_UNMAP,  "Page Unmap" },
    { 0, TRC_MEM_PAGE_GRANT_TRANSFER,  "Page Transfer" },
    { 0,      0, 		 0  }
};


void check_gotten_sum(void)
{
#if 0
    uint64_t sum, ns;
    extern uint64_t total_ns_gotten(uint64_t*);
    double percent;
    int i;

    for (i=0; i<NCPU; i++) {
      new_qos = cpu_qos_data[i];
      ns = billion;
      sum = total_ns_gotten(&ns);

      printf("[cpu%d] ns_gotten over all domains = %lldns, over %lldns\n",
	      i, sum, ns);
      percent = (double) sum;
      percent = (100.0*percent) / (double)ns;
      printf(" ==> ns_gotten = %7.3f%%\n", percent);
    }
#endif
}



void dump_stats(void) 
{
    stat_map_t *smt = stat_map;
    time_t end_time, run_time;

    time(&end_time);

    run_time = end_time - start_time;

    printf("Event counts:\n");
    while (smt->text != NULL) {
        printf("%08d\t%s\n", smt->event_count, smt->text);
        smt++;
    }

    printf("processed %d total records in %d seconds (%ld per second)\n",
            rec_count, (int)run_time, rec_count/run_time);

    check_gotten_sum();
}

void log_event(int event_id) 
{
    stat_map_t *smt = stat_map;

    //  printf("event_id = 0x%x\n", event_id);

    while (smt->text != NULL) {
        if (smt->event_id == event_id) {
            smt->event_count++;
            return;
        }
        smt++;
    }
    if (smt->text == NULL)
        stat_map[0].event_count++;	// other
}



/**
 * get_tbufs - get pointer to and size of the trace buffers
 * @mfn:  location to store mfn of the trace buffers to
 * @size: location to store the size of a trace buffer to
 *
 * Gets the machine address of the trace pointer area and the size of the
 * per CPU buffers.
 */
void get_tbufs(unsigned long *mfn, unsigned long *size)
{
    int ret;
    dom0_op_t op;                        /* dom0 op we'll build             */
    int xc_handle = xc_interface_open(); /* for accessing control interface */

    op.cmd = DOM0_TBUFCONTROL;
    op.interface_version = DOM0_INTERFACE_VERSION;
    op.u.tbufcontrol.op  = DOM0_TBUF_GET_INFO;

    ret = do_dom0_op(xc_handle, &op);

    xc_interface_close(xc_handle);

    if ( ret != 0 )
    {
        PERROR("Failure to get trace buffer pointer from Xen");
        exit(EXIT_FAILURE);
    }

    *mfn  = op.u.tbufcontrol.buffer_mfn;
    *size = op.u.tbufcontrol.size;
}

/**
 * map_tbufs - memory map Xen trace buffers into user space
 * @tbufs_mfn: mfn of the trace buffers
 * @num:       number of trace buffers to map
 * @size:      size of each trace buffer
 *
 * Maps the Xen trace buffers them into process address space.
 */
struct t_buf *map_tbufs(unsigned long tbufs_mfn, unsigned int num,
                        unsigned long size)
{
    int xc_handle;                  /* file descriptor for /proc/xen/privcmd */
    struct t_buf *tbufs_mapped;

    xc_handle = xc_interface_open();

    if ( xc_handle < 0 ) 
    {
        PERROR("Open /proc/xen/privcmd when mapping trace buffers\n");
        exit(EXIT_FAILURE);
    }

    tbufs_mapped = xc_map_foreign_range(xc_handle, DOMID_XEN,
                                        size * num, PROT_READ | PROT_WRITE,
                                        tbufs_mfn);

    xc_interface_close(xc_handle);

    if ( tbufs_mapped == 0 ) 
    {
        PERROR("Failed to mmap trace buffers");
        exit(EXIT_FAILURE);
    }

    return tbufs_mapped;
}

/**
 * init_bufs_ptrs - initialises an array of pointers to the trace buffers
 * @bufs_mapped:    the userspace address where the trace buffers are mapped
 * @num:            number of trace buffers
 * @size:           trace buffer size
 *
 * Initialises an array of pointers to individual trace buffers within the
 * mapped region containing all trace buffers.
 */
struct t_buf **init_bufs_ptrs(void *bufs_mapped, unsigned int num,
        unsigned long size)
{
    int i;
    struct t_buf **user_ptrs;

    user_ptrs = (struct t_buf **)calloc(num, sizeof(struct t_buf *));
    if ( user_ptrs == NULL )
    {
        PERROR( "Failed to allocate memory for buffer pointers\n");
        exit(EXIT_FAILURE);
    }

    /* initialise pointers to the trace buffers - given the size of a trace
     * buffer and the value of bufs_maped, we can easily calculate these */
    for ( i = 0; i<num; i++ )
        user_ptrs[i] = (struct t_buf *)((unsigned long)bufs_mapped + size * i);

    return user_ptrs;
}


/**
 * init_rec_ptrs - initialises data area pointers to locations in user space
 * @tbufs_mfn:     base mfn of the trace buffer area
 * @tbufs_mapped:  user virtual address of base of trace buffer area
 * @meta:          array of user-space pointers to struct t_buf's of metadata
 * @num:           number of trace buffers
 *
 * Initialises data area pointers to the locations that data areas have been
 * mapped in user space.  Note that the trace buffer metadata contains machine
 * pointers - the array returned allows more convenient access to them.
 */
struct t_rec **init_rec_ptrs(struct t_buf **meta, unsigned int num)
{
    int i;
    struct t_rec **data;
    
    data = calloc(num, sizeof(struct t_rec *));
    if ( data == NULL )
    {
        PERROR("Failed to allocate memory for data pointers\n");
        exit(EXIT_FAILURE);
    }

    for ( i = 0; i < num; i++ )
        data[i] = (struct t_rec *)(meta[i] + 1);

    return data;
}



/**
 * get_num_cpus - get the number of logical CPUs
 */
unsigned int get_num_cpus(void)
{
    dom0_op_t op;
    int xc_handle = xc_interface_open();
    int ret;

    op.cmd = DOM0_PHYSINFO;
    op.interface_version = DOM0_INTERFACE_VERSION;

    ret = xc_dom0_op(xc_handle, &op);

    if ( ret != 0 )
    {
        PERROR("Failure to get logical CPU count from Xen");
        exit(EXIT_FAILURE);
    }

    xc_interface_close(xc_handle);
    opts.cpu_freq = (double)op.u.physinfo.cpu_khz/1000.0;

    return (op.u.physinfo.threads_per_core *
            op.u.physinfo.cores_per_socket *
            op.u.physinfo.sockets_per_node *
            op.u.physinfo.nr_nodes);
}


/**
 * monitor_tbufs - monitor the contents of tbufs
 */
int monitor_tbufs(void)
{
    int i;
    extern void process_record(int, struct t_rec *);
    extern void alloc_qos_data(int ncpu);

    void *tbufs_mapped;          /* pointer to where the tbufs are mapped    */
    struct t_buf **meta;         /* pointers to the trace buffer metadata    */
    struct t_rec **data;         /* pointers to the trace buffer data areas
                                  * where they are mapped into user space.   */
    unsigned long tbufs_mfn;     /* mfn of the tbufs                         */
    unsigned int  num;           /* number of trace buffers / logical CPUS   */
    unsigned long size;          /* size of a single trace buffer            */

    int size_in_recs;

    /* get number of logical CPUs (and therefore number of trace buffers) */
    num = get_num_cpus();

    init_current(num);
    alloc_qos_data(num);

    printf("CPU Frequency = %7.2f\n", opts.cpu_freq);
    
    /* setup access to trace buffers */
    get_tbufs(&tbufs_mfn, &size);

    //    printf("from dom0op: %ld, t_buf: %d, t_rec: %d\n",
    //            size, sizeof(struct t_buf), sizeof(struct t_rec));

    tbufs_mapped = map_tbufs(tbufs_mfn, num, size);

    size_in_recs = (size - sizeof(struct t_buf)) / sizeof(struct t_rec);
    //    fprintf(stderr, "size_in_recs = %d\n", size_in_recs);

    /* build arrays of convenience ptrs */
    meta  = init_bufs_ptrs (tbufs_mapped, num, size);
    data  = init_rec_ptrs(meta, num);

    /* now, scan buffers for events */
    while ( !interrupted )
    {
        for ( i = 0; ( i < num ) && !interrupted; i++ )
            while ( meta[i]->cons != meta[i]->prod )
            {
                rmb(); /* read prod, then read item. */
                process_record(i, data[i] + meta[i]->cons % size_in_recs);
                mb(); /* read item, then update cons. */
                meta[i]->cons++;
            }

        nanosleep(&opts.poll_sleep, NULL);
    }

    /* cleanup */
    free(meta);
    free(data);
    /* don't need to munmap - cleanup is automatic */

    return 0;
}


/******************************************************************************
 * Various declarations / definitions GNU argp needs to do its work
 *****************************************************************************/


/* command parser for GNU argp - see GNU docs for more info */
error_t cmd_parser(int key, char *arg, struct argp_state *state)
{
    settings_t *setup = (settings_t *)state->input;

    switch ( key )
    {
        case 't': /* set new records threshold for logging */
            {
                char *inval;
                setup->new_data_thresh = strtol(arg, &inval, 0);
                if ( inval == arg )
                    argp_usage(state);
            }
            break;

        case 's': /* set sleep time (given in milliseconds) */
            {
                char *inval;
                setup->poll_sleep = millis_to_timespec(strtol(arg, &inval, 0));
                if ( inval == arg )
                    argp_usage(state);
            }
            break;

        case 'm': /* set ms_per_sample */
            {
                char *inval;
                setup->ms_per_sample = strtol(arg, &inval, 0);
                if ( inval == arg )
                    argp_usage(state);
            }
            break;

        case ARGP_KEY_ARG:
            {
                if ( state->arg_num == 0 )
                    setup->outfile = arg;
                else
                    argp_usage(state);
            }
            break;

        default:
            return ARGP_ERR_UNKNOWN;
    }

    return 0;
}

#define SHARED_MEM_FILE "/tmp/xenq-shm"
void alloc_qos_data(int ncpu)
{
    int i, n, pgsize, off=0;
    char *dummy;
    int qos_fd;
    void advance_next_datapoint(uint64_t);

    cpu_qos_data = (_new_qos_data **) calloc(ncpu, sizeof(_new_qos_data *));


    qos_fd = open(SHARED_MEM_FILE, O_RDWR|O_CREAT|O_TRUNC, 0777);
    if (qos_fd < 0) {
        PERROR(SHARED_MEM_FILE);
        exit(2);
    }
    pgsize = getpagesize();
    dummy = malloc(pgsize);

    for (n=0; n<ncpu; n++) {

      for (i=0; i<sizeof(_new_qos_data); i=i+pgsize)
        write(qos_fd, dummy, pgsize);

      new_qos = (_new_qos_data *) mmap(0, sizeof(_new_qos_data), PROT_READ|PROT_WRITE, 
				       MAP_SHARED, qos_fd, off);
      off += i;
      if (new_qos == NULL) {
        PERROR("mmap");
        exit(3);
      }
      //  printf("new_qos = %p\n", new_qos);
      memset(new_qos, 0, sizeof(_new_qos_data));
      new_qos->next_datapoint = 0;
      advance_next_datapoint(0);
      new_qos->structlen = i;
      new_qos->ncpu = ncpu;
      //      printf("structlen = 0x%x\n", i);
      cpu_qos_data[n] = new_qos;
    }
    free(dummy);
    new_qos = NULL;
}


#define xstr(x) str(x)
#define str(x) #x

const struct argp_option cmd_opts[] =
{
    { .name = "log-thresh", .key='t', .arg="l",
        .doc =
            "Set number, l, of new records required to trigger a write to output "
            "(default " xstr(NEW_DATA_THRESH) ")." },

    { .name = "poll-sleep", .key='s', .arg="p",
        .doc = 
            "Set sleep time, p, in milliseconds between polling the trace buffer "
            "for new data (default " xstr(POLL_SLEEP_MILLIS) ")." },

    { .name = "ms_per_sample", .key='m', .arg="MS",
        .doc = 
            "Specify the number of milliseconds per sample "
            " (default " xstr(MS_PER_SAMPLE) ")." },

    {0}
};

const struct argp parser_def =
{
    .options = cmd_opts,
    .parser = cmd_parser,
    //    .args_doc = "[output file]",
    .doc =
        "Tool to capture and partially process Xen trace buffer data"
        "\v"
        "This tool is used to capture trace buffer data from Xen.  The data is "
        "saved in a shared memory structure to be further processed by xenmon."
};


const char *argp_program_version     = "xenbaked v1.3";
const char *argp_program_bug_address = "<rob.gardner@hp.com>";


int main(int argc, char **argv)
{
    int ret;
    struct sigaction act;

    time(&start_time);
    opts.outfile = 0;
    opts.poll_sleep = millis_to_timespec(POLL_SLEEP_MILLIS);
    opts.new_data_thresh = NEW_DATA_THRESH;
    opts.ms_per_sample = MS_PER_SAMPLE;
    opts.cpu_freq = CPU_FREQ;

    argp_parse(&parser_def, argc, argv, 0, 0, &opts);
    fprintf(stderr, "ms_per_sample = %ld\n", opts.ms_per_sample);


    /* ensure that if we get a signal, we'll do cleanup, then exit */
    act.sa_handler = close_handler;
    act.sa_flags = 0;
    sigemptyset(&act.sa_mask);
    sigaction(SIGHUP,  &act, NULL);
    sigaction(SIGTERM, &act, NULL);
    sigaction(SIGINT,  &act, NULL);

    ret = monitor_tbufs();

    dump_stats();
    msync(new_qos, sizeof(_new_qos_data), MS_SYNC);

    return ret;
}

int domain_runnable(int domid)
{
    return new_qos->domain_info[ID(domid)].runnable;
}


void update_blocked_time(int domid, uint64_t now)
{
    uint64_t t_blocked;
    int id = ID(domid);

    if (new_qos->domain_info[id].blocked_start_time != 0) {
        if (now >= new_qos->domain_info[id].blocked_start_time)
            t_blocked = now - new_qos->domain_info[id].blocked_start_time;
        else
            t_blocked = now + (~0ULL - new_qos->domain_info[id].blocked_start_time);
        new_qos->qdata[new_qos->next_datapoint].ns_blocked[id] += t_blocked;
    }

    if (domain_runnable(id))
        new_qos->domain_info[id].blocked_start_time = 0;
    else
        new_qos->domain_info[id].blocked_start_time = now;
}


// advance to next datapoint for all domains
void advance_next_datapoint(uint64_t now)
{
    int new, old, didx;

    old = new_qos->next_datapoint;
    new = QOS_INCR(old);
    new_qos->next_datapoint = new;
    //	memset(&new_qos->qdata[new], 0, sizeof(uint64_t)*(2+5*NDOMAINS));
    for (didx = 0; didx < NDOMAINS; didx++) {
        new_qos->qdata[new].ns_gotten[didx] = 0;
        new_qos->qdata[new].ns_allocated[didx] = 0;
        new_qos->qdata[new].ns_waiting[didx] = 0;
        new_qos->qdata[new].ns_blocked[didx] = 0;
        new_qos->qdata[new].switchin_count[didx] = 0;
        new_qos->qdata[new].io_count[didx] = 0;
    }
    new_qos->qdata[new].ns_passed = 0;
    new_qos->qdata[new].lost_records = 0;
    new_qos->qdata[new].flip_free_periods = 0;

    new_qos->qdata[new].timestamp = now;
}



void qos_update_thread(int cpu, int domid, uint64_t now)
{
    int n, id;
    uint64_t last_update_time, start;
    int64_t time_since_update, run_time = 0;

    id = ID(domid);

    n = new_qos->next_datapoint;
    last_update_time = new_qos->domain_info[id].last_update_time;

    time_since_update = now - last_update_time;

    if (time_since_update < 0) {
      // what happened here? either a timestamp wraparound, or more likely,
      // a slight inconsistency among timestamps from various cpu's
      if (-time_since_update < billion) {
	// fairly small difference, let's just adjust 'now' to be a little
	// beyond last_update_time
	time_since_update = -time_since_update;
      }
      else if ( ((~0ULL - last_update_time) < billion) && (now < billion) ) {
	// difference is huge, must be a wraparound
	// last_update time should be "near" ~0ULL,
	// and now should be "near" 0
	time_since_update = now + (~0ULL - last_update_time);
	printf("time wraparound\n");
      }
      else {
	// none of the above, may be an out of order record
	// no good solution, just ignore and update again later
	return;
      }
    }
	
    new_qos->domain_info[id].last_update_time = now;

    if (new_qos->domain_info[id].runnable_at_last_update && is_current(domid, cpu)) {
        start = new_qos->domain_info[id].start_time;
        if (start > now) {		// wrapped around
            run_time = now + (~0ULL - start);
	    printf("warning: start > now\n");
        }
        else
            run_time = now - start;
	//	if (run_time < 0)	// should not happen
	//	  printf("warning: run_time < 0; start = %lld now= %lld\n", start, now);
        new_qos->domain_info[id].ns_oncpu_since_boot += run_time;
        new_qos->domain_info[id].start_time = now;
        new_qos->domain_info[id].ns_since_boot += time_since_update;
#if 1
	new_qos->qdata[n].ns_gotten[id] += run_time;
	if (domid == 0 && cpu == 1)
	  printf("adding run time for dom0 on cpu1\r\n");
#endif
    }

    new_qos->domain_info[id].runnable_at_last_update = domain_runnable(domid);

    update_blocked_time(domid, now);

    // how much time passed since this datapoint was updated?
    if (now >= new_qos->qdata[n].timestamp) {
        // all is right with the world, time is increasing
        new_qos->qdata[n].ns_passed += (now - new_qos->qdata[n].timestamp);
    }
    else {
        // time wrapped around
        //new_qos->qdata[n].ns_passed += (now + (~0LL - new_qos->qdata[n].timestamp));
        //    printf("why timewrap?\r\n");
    }
    new_qos->qdata[n].timestamp = now;
}


// called by dump routines to update all structures
void qos_update_all(uint64_t now, int cpu)
{
    int i;

    for (i=0; i<NDOMAINS; i++)
        if (new_qos->domain_info[i].in_use)
            qos_update_thread(cpu, i, now);
}


void qos_update_thread_stats(int cpu, int domid, uint64_t now)
{
    if (new_qos->qdata[new_qos->next_datapoint].ns_passed > (million*opts.ms_per_sample)) {
        qos_update_all(now, cpu);
        advance_next_datapoint(now);
        return;
    }
    qos_update_thread(cpu, domid, now);
}


void qos_init_domain(int cpu, int domid, uint64_t now)
{
    int i, id;

    id = ID(domid);

    if (new_qos->domain_info[id].in_use)
        return;


    memset(&new_qos->domain_info[id], 0, sizeof(_domain_info));
    new_qos->domain_info[id].last_update_time = now;
    //  runnable_start_time[id] = 0;
    new_qos->domain_info[id].runnable_start_time = 0; // invalidate
    new_qos->domain_info[id].in_use = 1;
    new_qos->domain_info[id].blocked_start_time = 0;
    new_qos->domain_info[id].id = id;
    if (domid == IDLE_DOMAIN_ID)
        sprintf(new_qos->domain_info[id].name, "Idle Task%d", cpu);
    else
        sprintf(new_qos->domain_info[id].name, "Domain#%d", domid);

    for (i=0; i<NSAMPLES; i++) {
        new_qos->qdata[i].ns_gotten[id] = 0;
        new_qos->qdata[i].ns_allocated[id] = 0;
        new_qos->qdata[i].ns_waiting[id] = 0;
        new_qos->qdata[i].ns_blocked[id] = 0;
        new_qos->qdata[i].switchin_count[id] = 0;
        new_qos->qdata[i].io_count[id] = 0;
    }
}


// called when a new thread gets the cpu
void qos_switch_in(int cpu, int domid, uint64_t now, unsigned long ns_alloc, unsigned long ns_waited)
{
    int id = ID(domid);

    new_qos->domain_info[id].runnable = 1;
    update_blocked_time(domid, now);
    new_qos->domain_info[id].blocked_start_time = 0; // invalidate
    new_qos->domain_info[id].runnable_start_time = 0; // invalidate
    //runnable_start_time[id] = 0;

    new_qos->domain_info[id].start_time = now;
    new_qos->qdata[new_qos->next_datapoint].switchin_count[id]++;
    new_qos->qdata[new_qos->next_datapoint].ns_allocated[id] += ns_alloc;
    new_qos->qdata[new_qos->next_datapoint].ns_waiting[id] += ns_waited;
    qos_update_thread_stats(cpu, domid, now);
    set_current(cpu, id);

    // count up page flips for dom0 execution
    if (id == 0)
      dom0_flips = 0;
}

// called when the current thread is taken off the cpu
void qos_switch_out(int cpu, int domid, uint64_t now, unsigned long gotten)
{
    int id = ID(domid);
    int n;

    if (!is_current(id, cpu)) {
        //    printf("switching out domain %d but it is not current. gotten=%ld\r\n", id, gotten);
    }

    if (gotten == 0) {
        printf("gotten==0 in qos_switchout(domid=%d)\n", domid);
    }

    if (gotten < 100) {
        printf("gotten<100ns in qos_switchout(domid=%d)\n", domid);
    }


    n = new_qos->next_datapoint;
#if 0
    new_qos->qdata[n].ns_gotten[id] += gotten;
    if (gotten > new_qos->qdata[n].ns_passed)
      printf("inconsistency #257, diff = %lld\n",
	    gotten - new_qos->qdata[n].ns_passed );
#endif
    new_qos->domain_info[id].ns_oncpu_since_boot += gotten;
    new_qos->domain_info[id].runnable_start_time = now;
    //  runnable_start_time[id] = now;
    qos_update_thread_stats(cpu, id, now);

    // process dom0 page flips
    if (id == 0)
      if (dom0_flips == 0)
	new_qos->qdata[n].flip_free_periods++;
}

// called when domain is put to sleep, may also be called
// when thread is already asleep
void qos_state_sleeping(int cpu, int domid, uint64_t now) 
{
    int id = ID(domid);

    if (!domain_runnable(id))	// double call?
        return;

    new_qos->domain_info[id].runnable = 0;
    new_qos->domain_info[id].blocked_start_time = now;
    new_qos->domain_info[id].runnable_start_time = 0; // invalidate
    //  runnable_start_time[id] = 0; // invalidate
    qos_update_thread_stats(cpu, domid, now);
}



void qos_kill_thread(int domid)
{
    new_qos->domain_info[ID(domid)].in_use = 0;
}


// called when thread becomes runnable, may also be called
// when thread is already runnable
void qos_state_runnable(int cpu, int domid, uint64_t now)
{
    int id = ID(domid);

    if (domain_runnable(id))	// double call?
        return;
    new_qos->domain_info[id].runnable = 1;
    update_blocked_time(domid, now);

    qos_update_thread_stats(cpu, domid, now);

    new_qos->domain_info[id].blocked_start_time = 0; /* invalidate */
    new_qos->domain_info[id].runnable_start_time = now;
    //  runnable_start_time[id] = now;
}


void qos_count_packets(domid_t domid, uint64_t now)
{
  int i, id = ID(domid);
  _new_qos_data *cpu_data;

  for (i=0; i<NCPU; i++) {
    cpu_data = cpu_qos_data[i];
    if (cpu_data->domain_info[id].in_use) {
      cpu_data->qdata[cpu_data->next_datapoint].io_count[id]++;
    }
  }

  new_qos->qdata[new_qos->next_datapoint].io_count[0]++;
  dom0_flips++;
}


int domain_ok(int cpu, int domid, uint64_t now)