天天看点

Linux中断子系统(二)中断控制器GIC驱动分析Linux中断子系统(二)中断控制器GIC驱动分析

Linux中断子系统(二)中断控制器GIC驱动分析

备注:

  1. Kernel版本:5.4

  2. 使用工具:Source Insight 4.0

  3. 参考博客:

Linux中断子系统(一)中断控制器及驱动分析

吐血整理|肝翻Linux中断所有知识点

linux kernel的中断子系统之(七):GIC代码分析

设备信息添加

ARM平台的设备信息,都是通过Device Tree设备树来添加,下面就是一个中断控制器的设备树信息:

gic: [email protected]{
			compatible = "arm,gic-400", "arm,cortex-a7-gic";
			reg = <0xb0001000 0x1000>, /* GICD */
				  <0xb0002000 0x1000>; /* GICC */
			interrupt-controller;
			#interrupt-cells = <3>;
			interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
		};
           

字段解析:

  • compatible字段:用于与具体的驱动来进行匹配,比如示例中“arm, gic-400”,可以根据这个名字去匹配对应的驱动程序;

  • reg字段:描述中断控制器的地址信息以及地址范围,比如示例中分别制定了GIC Distributor(GICD)和GIC CPU Interface(GICC)的地址信息;

  • interrupt-controller字段:表示该设备是一个中断控制器,外设可以连接在该中断控制器上;

  • interrupt-cells字段:用于指定编码一个中断源所需要的单元个数,这个值为3。比如在外设在设备树中添加中断信号时,通常能看到类似interrupts = <0 23 4>;的信息,第一个单元0,表示的是中断类型(1:PPI,0:SPI),第二个单元23表示的是中断号,第三个单元4表示的是中断触发的类型;

  • interrupts:分别代表中断类型,中断号,中断类型, PPI中断亲和, 保留字段。

注:关于设备数的各个字段含义,详细可以参考Documentation/devicetree/bindings下的对应信息;

设备树的信息,添加到系统中的:

Linux中断子系统(二)中断控制器GIC驱动分析Linux中断子系统(二)中断控制器GIC驱动分析

驱动流程分析

GIC驱动的执行流程如下图所示:

Linux中断子系统(二)中断控制器GIC驱动分析Linux中断子系统(二)中断控制器GIC驱动分析
  • 首先需要了解一下链接脚本vmlinux.lds,脚本中定义了一个__irqchip_of_table段,该段用于存放中断控制器信息,用于最终来匹配设备;
  • 在GIC驱动程序中,使用IRQCHIP_DECLARE宏来声明结构信息,包括compatible字段和回调函数,该宏会将这个结构放置到__irqchip_of_table字段中;
  • 在内核启动初始化中断的函数中,of_irq_init函数会去查找设备节点信息,该函数的传入参数就是__irqchip_of_table段,由于IRQCHIP_DECLARE已经将信息填充好了,of_irq_init函数会根据arm,gic-400去查找对应的设备节点,并获取设备的信息。中断控制器也存在级联的情况,of_irq_init函数中也处理了这种情况;
  • or_irq_init函数中,最终会回调IRQCHIP_DECLARE声明的回调函数,也就是gic_of_init,而这个函数就是GIC驱动的初始化入口函数了;
  • GIC的工作,本质上是由中断信号来驱动,因此驱动本身的工作就是完成各类信息的初始化,注册好相应的回调函数,以便能在信号到来之时去执行;
  • set_smp_process_call设置__smp_cross_call函数指向gic_raise_softirq,本质上就是通过软件来触发GIC的SGI中断,用于核间交互;
  • cpuhp_setup_state_nocalls函数,设置好CPU进行热插拔时GIC的回调函数,以便在CPU热插拔时做相应处理;
  • set_handle_irq函数的设置很关键,它将全局函数指针handle_arch_irq指向了gic_handle_irq,而处理器在进入中断异常时,会跳转到handle_arch_irq执行,所以,可以认为它就是中断处理的入口函数了;
  • 驱动中完成了各类函数的注册,此外还完成了irq_chip, irq_domain等结构体的初始化,这些结构在下文会进一步分析;
  • 最后,完成GIC硬件模块的初始化设置,以及电源管理相关的注册等工作;

    注:以上内容来自博客:https://www.cnblogs.com/LoyenWang/p/12996812.html

irq chip driver的声明

源码:driver/irqchip/irq-gic.c

IRQCHIP_DECLARE(gic_400, "arm,gic-400", gic_of_init);
IRQCHIP_DECLARE(cortex_a7_gic, "arm,cortex-a7-gic", gic_of_init);
           

  定义 IRQCHIP_DECLARE 之后,相应的内容会保存到 __irqchip_of_table 里边:

#define IRQCHIP_DECLARE(name, compat, fn) OF_DECLARE_2(irqchip, name, compat, fn)


#define OF_DECLARE_2(table, name, compat, fn) \
		_OF_DECLARE(table, name, compat, fn, of_init_fn_2)
        
#define _OF_DECLARE(table, name, compat, fn, fn_type)			\
	static const struct of_device_id __of_table_##name		\
		__used __section(__##table##_of_table)			\
		__aligned(__alignof__(struct of_device_id))		\
		 = { .compatible = compat,				\
		     .data = (fn == (fn_type)NULL) ? fn : fn  }      
           

  __irqchip_of_table 在链接脚本 vmlinux.lds 里,被放到了 __irqchip_begin 和 __irqchip_of_end 之间,该段用于存放中断控制器信息:

#ifdef CONFIG_IRQCHIP    #define IRQCHIP_OF_MATCH_TABLE()                    \
        . = ALIGN(8);                           \
        VMLINUX_SYMBOL(__irqchip_begin) = .;                \
        *(__irqchip_of_table)                       \
        *(__irqchip_of_end)#endif
           

  在内核启动初始化中断的函数中,of_irq_init 函数会去查找设备节点信息,该函数的传入参数就是 __irqchip_of_table 段,由于 IRQCHIP_DECLARE 已经将信息填充好了,of_irq_init 函数会根据 “arm,gic-400” 去查找对应的设备节点,并获取设备的信息。or_irq_init 函数中,最终会回调 IRQCHIP_DECLARE 声明的回调函数,也就是 gic_of_init,而这个函数就是 GIC 驱动的初始化入口。

gic_of_init函数分析

源码:driver/irqchip/irq-gic.c

int __init
gic_of_init(struct device_node *node, struct device_node *parent)
{
	struct gic_chip_data *gic;
	int irq, ret;

	if (WARN_ON(!node))
		return -ENODEV;

	if (WARN_ON(gic_cnt >= CONFIG_ARM_GIC_MAX_NR))
		return -EINVAL;

	gic = &gic_data[gic_cnt];

	ret = gic_of_setup(gic, node);//映射GICD、GICC地址空间
	if (ret)
		return ret;

	/*
	 * Disable split EOI/Deactivate if either HYP is not available
	 * or the CPU interface is too small.
	 */
	if (gic_cnt == 0 && !gic_check_eoimode(node, &gic->raw_cpu_base))
		static_branch_disable(&supports_deactivate_key);

	//创建irq_domain,初始化distributor、cpu interface等
	ret = __gic_init_bases(gic, &node->fwnode);
	if (ret) {
		gic_teardown(gic);
		return ret;
	}

	if (!gic_cnt) {
		gic_init_physaddr(node);
		gic_of_setup_kvm_info(node);
	}

	//中断映射
	if (parent) {
		irq = irq_of_parse_and_map(node, 0);
		gic_cascade_irq(gic_cnt, irq);
	}

	if (IS_ENABLED(CONFIG_ARM_GIC_V2M))
		gicv2m_init(&node->fwnode, gic_data[gic_cnt].domain);

	gic_cnt++;
	return 0;
}
           

gic_of_setup函数分析:

static int gic_of_setup(struct gic_chip_data *gic, struct device_node *node)
{
	if (!gic || !node)
		return -EINVAL;

	gic->raw_dist_base = of_iomap(node, 0);//映射GIC distributor的寄存器地址空间
	if (WARN(!gic->raw_dist_base, "unable to map gic dist registers\n"))
		goto error;

	gic->raw_cpu_base = of_iomap(node, 1);//映射GIC cpu interface的寄存器地址空间
	if (WARN(!gic->raw_cpu_base, "unable to map gic cpu registers\n"))
		goto error;

	//SMP时,CPU interface offset
	if (of_property_read_u32(node, "cpu-offset", &gic->percpu_offset))
		gic->percpu_offset = 0;

	return 0;

error:
	gic_teardown(gic);

	return -ENOMEM;
}
           

__gic_init_bases函数分析:

static int __init __gic_init_bases(struct gic_chip_data *gic,
				   struct fwnode_handle *handle)
{
	char *name;
	int i, ret;

	if (WARN_ON(!gic || gic->domain))
		return -EINVAL;

	if (gic == &gic_data[0]) {
		/*
		 * Initialize the CPU interface map to all CPUs.
		 * It will be refined as each CPU probes its ID.
		 * This is only necessary for the primary GIC.
		 */
		for (i = 0; i < NR_GIC_CPU_IF; i++)
			gic_cpu_map[i] = 0xff;
#ifdef CONFIG_SMP
		set_smp_cross_call(gic_raise_softirq);//设置SMP核减交互的回调函数,用于IPI
#endif
		cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GIC_STARTING,
					  "irqchip/arm/gic:starting",
					  gic_starting_cpu, NULL);
		set_handle_irq(gic_handle_irq);//设定相关的irq handler,异常处理的入口
		if (static_branch_likely(&supports_deactivate_key))
			pr_info("GIC: Using split EOI/Deactivate mode\n");
	}

	if (static_branch_likely(&supports_deactivate_key) && gic == &gic_data[0]) {
		name = kasprintf(GFP_KERNEL, "GICv2");
		gic_init_chip(gic, NULL, name, true);//初始化gic_chip,如:gic_set_affinity
	} else {
		name = kasprintf(GFP_KERNEL, "GIC-%d", (int)(gic-&gic_data[0]));
		gic_init_chip(gic, NULL, name, false);
	}

	ret = gic_init_bases(gic, handle);
	if (ret)
		kfree(name);

	return ret;
}
           

gic_init_bases函数分析:

static int gic_init_bases(struct gic_chip_data *gic,
			  struct fwnode_handle *handle)
{
	int gic_irqs, ret;

	//smp架构时,gic cpu interface是否有影子寄存器
	if (IS_ENABLED(CONFIG_GIC_NON_BANKED) && gic->percpu_offset) {
		/* Frankein-GIC without banked registers... */
		unsigned int cpu;

		gic->dist_base.percpu_base = alloc_percpu(void __iomem *);
		gic->cpu_base.percpu_base = alloc_percpu(void __iomem *);
		if (WARN_ON(!gic->dist_base.percpu_base ||
			    !gic->cpu_base.percpu_base)) {
			ret = -ENOMEM;
			goto error;
		}

		for_each_possible_cpu(cpu) {
			u32 mpidr = cpu_logical_map(cpu);
			u32 core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
			unsigned long offset = gic->percpu_offset * core_id;
			*per_cpu_ptr(gic->dist_base.percpu_base, cpu) =
				gic->raw_dist_base + offset;
			*per_cpu_ptr(gic->cpu_base.percpu_base, cpu) =
				gic->raw_cpu_base + offset;
		}

		gic_set_base_accessor(gic, gic_get_percpu_base);
	} else {
		/* Normal, sane GIC... */
		WARN(gic->percpu_offset,
		     "GIC_NON_BANKED not enabled, ignoring %08x offset!",
		     gic->percpu_offset);
		gic->dist_base.common_base = gic->raw_dist_base;
		gic->cpu_base.common_base = gic->raw_cpu_base;
		gic_set_base_accessor(gic, gic_get_common_base);
	}

	/*
	 * Find out how many interrupts are supported.
	 * The GIC only supports up to 1020 interrupt sources.
	 */
	//获取gic最多支持的中断数
	gic_irqs = readl_relaxed(gic_data_dist_base(gic) + GIC_DIST_CTR) & 0x1f;
	gic_irqs = (gic_irqs + 1) * 32;
	if (gic_irqs > 1020)
		gic_irqs = 1020;
	gic->gic_irqs = gic_irqs;

	//注册一个irq domain的数据结构
	if (handle) {		/* DT/ACPI */
		gic->domain = irq_domain_create_linear(handle, gic_irqs,
						       &gic_irq_domain_hierarchy_ops,
						       gic);
	} else {		/* Legacy support */
		/*
		 * For primary GICs, skip over SGIs.
		 * No secondary GIC support whatsoever.
		 */
		int irq_base;

		gic_irqs -= 16; /* calculate # of irqs to allocate */

		irq_base = irq_alloc_descs(16, 16, gic_irqs,
					   numa_node_id());
		if (irq_base < 0) {
			WARN(1, "Cannot allocate irq_descs @ IRQ16, assuming pre-allocated\n");
			irq_base = 16;
		}

		gic->domain = irq_domain_add_legacy(NULL, gic_irqs, irq_base,
						    16, &gic_irq_domain_ops, gic);
	}

	if (WARN_ON(!gic->domain)) {
		ret = -ENODEV;
		goto error;
	}

	gic_dist_init(gic);		//初始化distributor
	ret = gic_cpu_init(gic);//初始化cpu interface
	if (ret)
		goto error;

	ret = gic_pm_init(gic);//初始化gic 电源管理
	if (ret)
		goto error;

	return 0;

error:
	if (IS_ENABLED(CONFIG_GIC_NON_BANKED) && gic->percpu_offset) {
		free_percpu(gic->dist_base.percpu_base);
		free_percpu(gic->cpu_base.percpu_base);
	}

	return ret;
}
           

gic_dist_init函数分析:

static void gic_dist_init(struct gic_chip_data *gic)
{
	unsigned int i;
	u32 cpumask;
	unsigned int gic_irqs = gic->gic_irqs;
	void __iomem *base = gic_data_dist_base(gic);

	writel_relaxed(GICD_DISABLE, base + GIC_DIST_CTRL);

	/*
	 * Set all global interrupts to this CPU only.
	 */
	cpumask = gic_get_cpumask(gic);//获取当前cpu的mask值,只需获取低8bit
	cpumask |= cpumask << 8;
	cpumask |= cpumask << 16;
	for (i = 32; i < gic_irqs; i += 4)//将所有SPI中断默认配置到当前CPU的cpu interface
		writel_relaxed(cpumask, base + GIC_DIST_TARGET + i * 4 / 4);

	gic_dist_config(base, gic_irqs, NULL);//将所有SPI中断默认配置为低电平触发

	writel_relaxed(GICD_ENABLE, base + GIC_DIST_CTRL);
}
           

gic_cpu_init函数分析:

static int gic_cpu_init(struct gic_chip_data *gic)
{
	void __iomem *dist_base = gic_data_dist_base(gic);
	void __iomem *base = gic_data_cpu_base(gic);
	unsigned int cpu_mask, cpu = smp_processor_id();
	int i;

	/*
	 * Setting up the CPU map is only relevant for the primary GIC
	 * because any nested/secondary GICs do not directly interface
	 * with the CPU(s).
	 */
	if (gic == &gic_data[0]) {
		/*
		 * Get what the GIC says our CPU mask is.
		 */
		if (WARN_ON(cpu >= NR_GIC_CPU_IF))
			return -EINVAL;

		gic_check_cpu_features();
		cpu_mask = gic_get_cpumask(gic);
		gic_cpu_map[cpu] = cpu_mask;

		/*
		 * Clear our mask from the other map entries in case they're
		 * still undefined.
		 */
		for (i = 0; i < NR_GIC_CPU_IF; i++)
			if (i != cpu)
				gic_cpu_map[i] &= ~cpu_mask;
	}

	gic_cpu_config(dist_base, 32, NULL);//清除所有的PPI中断状态,配置PPI、SPI中断的优先级为0xa0

	writel_relaxed(GICC_INT_PRI_THRESHOLD, base + GIC_CPU_PRIMASK);
	gic_cpu_if_up(gic);

	return 0;
}
           

数据结构分析

Linux中断子系统(二)中断控制器GIC驱动分析Linux中断子系统(二)中断控制器GIC驱动分析
  • GIC驱动中,使用struct gic_chip_data结构体来描述GIC控制器的信息,整个驱动都是围绕着该结构体的初始化,驱动中将函数指针都初始化好,实际的工作是由中断信号触发,也就是在中断来临的时候去进行回调;
  • struct irq_chip结构,描述的是中断控制器的底层操作函数集,这些函数集最终完成对控制器硬件的操作;
  • struct irq_domain结构,用于硬件中断号和Linux IRQ中断号(virq,虚拟中断号)之间的映射;

gic_data数据结构分析:

struct gic_chip_data {
	struct irq_chip chip;
	union gic_base dist_base;	//GIC Distributor的物理基地址空间
	union gic_base cpu_base;	//GIC CPU interface的物理基地址空间
	void __iomem *raw_dist_base;//GIC Distributor的虚拟基地址空间
	void __iomem *raw_cpu_base; //GIC CPU interface的虚拟基地址空间
	u32 percpu_offset;
#if defined(CONFIG_CPU_PM) || defined(CONFIG_ARM_GIC_PM)//GIC 电源管理相关的成员
	u32 saved_spi_enable[DIV_ROUND_UP(1020, 32)];
	u32 saved_spi_active[DIV_ROUND_UP(1020, 32)];
	u32 saved_spi_conf[DIV_ROUND_UP(1020, 16)];
	u32 saved_spi_target[DIV_ROUND_UP(1020, 4)];
	u32 __percpu *saved_ppi_enable;
	u32 __percpu *saved_ppi_active;
	u32 __percpu *saved_ppi_conf;
#endif
	struct irq_domain *domain;	//该GIC对应的irq domain数据结构
	unsigned int gic_irqs;		//GIC支持的IRQ的数目
#ifdef CONFIG_GIC_NON_BANKED
	void __iomem *(*get_base)(union gic_base *);
#endif
};

static struct gic_chip_data gic_data[CONFIG_ARM_GIC_MAX_NR] __read_mostly;
           

irq_chip数据结构体分析: 用于对中断控制器的硬件操作。

struct irq_chip {
	struct device	*parent_device;		//指向父设备
	const char	*name;					// /proc/interrupts中显示的名字
	unsigned int	(*irq_startup)(struct irq_data *data);//启动中断,如果设置成NULL,则默认为enable
	void		(*irq_shutdown)(struct irq_data *data);	关闭中断,如果设置成NULL,则默认为disable
	void		(*irq_enable)(struct irq_data *data);//中断使能,如果设置成 NULL,则默认为 chip->unmask
	void		(*irq_disable)(struct irq_data *data);//中断禁止

	void		(*irq_ack)(struct irq_data *data);//开始新的中断
	void		(*irq_mask)(struct irq_data *data);//中断源屏蔽
	void		(*irq_mask_ack)(struct irq_data *data);//应答并屏蔽中断
	void		(*irq_unmask)(struct irq_data *data);//解除中断屏蔽
	void		(*irq_eoi)(struct irq_data *data);//中断处理结束后调用

	int		(*irq_set_affinity)(struct irq_data *data, const struct cpumask *dest, bool force);//在SMP中设置CPU亲和力
	int		(*irq_retrigger)(struct irq_data *data);//重新发送中断到CPU
	int		(*irq_set_type)(struct irq_data *data, unsigned int flow_type);//设置中断触发类型
	int		(*irq_set_wake)(struct irq_data *data, unsigned int on);//使能/禁止电源管理中的唤醒功能

	void		(*irq_bus_lock)(struct irq_data *data);//慢速芯片总线上的锁
	void		(*irq_bus_sync_unlock)(struct irq_data *data); //同步释放慢速总线芯片的锁

	void		(*irq_cpu_online)(struct irq_data *data);
	void		(*irq_cpu_offline)(struct irq_data *data);

	void		(*irq_suspend)(struct irq_data *data);
	void		(*irq_resume)(struct irq_data *data);
	void		(*irq_pm_shutdown)(struct irq_data *data);

	void		(*irq_calc_mask)(struct irq_data *data);

	void		(*irq_print_chip)(struct irq_data *data, struct seq_file *p);
	int		(*irq_request_resources)(struct irq_data *data);
	void		(*irq_release_resources)(struct irq_data *data);

	void		(*irq_compose_msi_msg)(struct irq_data *data, struct msi_msg *msg);
	void		(*irq_write_msi_msg)(struct irq_data *data, struct msi_msg *msg);

	int		(*irq_get_irqchip_state)(struct irq_data *data, enum irqchip_irq_state which, bool *state);
	int		(*irq_set_irqchip_state)(struct irq_data *data, enum irqchip_irq_state which, bool state);

	int		(*irq_set_vcpu_affinity)(struct irq_data *data, void *vcpu_info);

	void		(*ipi_send_single)(struct irq_data *data, unsigned int cpu);
	void		(*ipi_send_mask)(struct irq_data *data, const struct cpumask *dest);

	int		(*irq_nmi_setup)(struct irq_data *data);
	void		(*irq_nmi_teardown)(struct irq_data *data);

	unsigned long	flags;
};
           

irq_domain数据结构体分析: 与中断控制器对应,完成硬件中断号 hwirq 到 virq 的映射。

struct irq_domain {
	struct list_head link;	//用于添加到全局链表irq_domain_list中
	const char *name;		//IRQ domain的名字
	const struct irq_domain_ops *ops;//IRQ domain映射操作函数集
	void *host_data;		//在GIC驱动中,指向了irq_gic_data
	unsigned int flags;
	unsigned int mapcount;	//映射中断的个数

	/* Optional data */
	struct fwnode_handle *fwnode;对应中断控制器的 device node
	enum irq_domain_bus_token bus_token;
	struct irq_domain_chip_generic *gc;
#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
	struct irq_domain *parent;//指向父级 irq_domain 的指针,用于支持级联 irq_domain
#endif
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
	struct dentry		*debugfs_file;
#endif

	/* reverse map data. The linear map gets appended to the irq_domain */
	irq_hw_number_t hwirq_max;//IRQ domain支持中断数量的最大值
	unsigned int revmap_direct_max_irq;
	unsigned int revmap_size;//线性映射的大小
	struct radix_tree_root revmap_tree;//Radix Tree映射的根节点
	struct mutex revmap_tree_mutex;
	unsigned int linear_revmap[];//线性映射用到的查找表
};
           

irq_domain_ops数据结构体分析: irq_domain 映射操作函数集

struct irq_domain_ops {
	// 用于中断控制器设备与IRQ domain的匹配
	int (*match)(struct irq_domain *d, struct device_node *node,
		     enum irq_domain_bus_token bus_token);
	int (*select)(struct irq_domain *d, struct irq_fwspec *fwspec,
		      enum irq_domain_bus_token bus_token);

	//用于硬件中断号与Linux中断号的映射
	int (*map)(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw);
	void (*unmap)(struct irq_domain *d, unsigned int virq);

	//通过device_node,解析硬件中断号和触发方式
	int (*xlate)(struct irq_domain *d, struct device_node *node,
		     const u32 *intspec, unsigned int intsize,
		     unsigned long *out_hwirq, unsigned int *out_type);

#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
	/* extended V2 interfaces to support hierarchy irq_domains */
	int (*alloc)(struct irq_domain *d, unsigned int virq,
		     unsigned int nr_irqs, void *arg);
	void (*free)(struct irq_domain *d, unsigned int virq,
		     unsigned int nr_irqs);
	int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool reserve);
	void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data);
	int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec,
			 unsigned long *out_hwirq, unsigned int *out_type);
#endif
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
	void (*debug_show)(struct seq_file *m, struct irq_domain *d,
			   struct irq_data *irqd, int ind);
#endif
};
           

irq_desc数据结构体分析: 描述一个外设的中断,称之中断描述符

struct irq_desc {
	struct irq_common_data	irq_common_data;
	struct irq_data		irq_data;		//中断控制器的硬件数据
	unsigned int __percpu	*kstat_irqs;
	irq_flow_handler_t	handle_irq;		//中断控制器驱动的处理函数,指向一个 struct irqaction 的链表
#ifdef CONFIG_IRQ_PREFLOW_FASTEOI
	irq_preflow_handler_t	preflow_handler;
#endif
	struct irqaction	*action;	/* IRQ action list *///设备驱动的处理函数
	unsigned int		status_use_accessors;
	unsigned int		core_internal_state__do_not_mess_with_it;
	unsigned int		depth;		/* nested irq disables */
	unsigned int		wake_depth;	/* nested wake enables */
	unsigned int		tot_count;
	unsigned int		irq_count;	/* For detecting broken IRQs */
	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
	unsigned int		irqs_unhandled;
	atomic_t		threads_handled;
	int			threads_handled_last;
	raw_spinlock_t		lock;
	struct cpumask		*percpu_enabled;
	const struct cpumask	*percpu_affinity;
#ifdef CONFIG_SMP
	const struct cpumask	*affinity_hint;
	struct irq_affinity_notify *affinity_notify;
#ifdef CONFIG_GENERIC_PENDING_IRQ
	cpumask_var_t		pending_mask;
#endif
#endif
	unsigned long		threads_oneshot;
	atomic_t		threads_active;
	wait_queue_head_t       wait_for_threads;
#ifdef CONFIG_PM_SLEEP
	unsigned int		nr_actions;
	unsigned int		no_suspend_depth;
	unsigned int		cond_suspend_depth;
	unsigned int		force_resume_depth;
#endif
#ifdef CONFIG_PROC_FS
	struct proc_dir_entry	*dir;
#endif
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
	struct dentry		*debugfs_file;
	const char		*dev_name;
#endif
#ifdef CONFIG_SPARSE_IRQ
	struct rcu_head		rcu;
	struct kobject		kobj;
#endif
	struct mutex		request_mutex;
	int			parent_irq;
	struct module		*owner;
	const char		*name;
} ____cacheline_internodealigned_in_smp;
           

irq_data数据结构体分析: 包含中断控制器的硬件数据

struct irq_data {
	u32			mask;
	unsigned int		irq;	//虚拟中断号
	unsigned long		hwirq;	//硬件中断号
	struct irq_common_data	*common;
	struct irq_chip		*chip;	//对应的irq_chip数据结构
	struct irq_domain	*domain;//对应的irq_domain数据结构
#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
	struct irq_data		*parent_data;
#endif
	void			*chip_data;
};
           
Linux中断子系统(二)中断控制器GIC驱动分析Linux中断子系统(二)中断控制器GIC驱动分析

  上面的结构体 struct irq_desc 是设备驱动加载的过程中完成的,让设备树中的中断能与具体的中断描述符 irq_desc 匹配,其中 struct irqaction 保存着设备的中断处理函数。右边框内的结构体主要是在中断控制器驱动加载的过程中完成的,其中 struct irq_chip 用于对中断控制器的硬件操作,struct irq_domain 用于硬件中断号到 Linux irq 的映射。

注:以上内容出自:

1)吐血整理|肝翻Linux中断所有知识点

2)Linux中断子系统(一)—中断控制器及驱动分析

irq domain浅析

  IRQ domain用于将硬件的中断号,转换成Linux系统中的中断号(virtual irq, virq),如下图:

Linux中断子系统(二)中断控制器GIC驱动分析Linux中断子系统(二)中断控制器GIC驱动分析
  • 每个中断控制器都对应一个IRQ Domain;
  • 中断控制器驱动通过irq_domain_add_*()接口来创建IRQ Domain;
  • IRQ Domain支持三种映射方式:linear map(线性映射),tree map(树映射),no map(不映射);

  1)linear map:维护固定大小的表,索引是硬件中断号,如果硬件中断最大数量固定,并且数值不大,可以选择线性映射;

  2)tree map:硬件中断号可能很大,可以选择树映射;

  3)no map:硬件中断号直接就是Linux的中断号;

三种映射的方式如下图:

Linux中断子系统(二)中断控制器GIC驱动分析Linux中断子系统(二)中断控制器GIC驱动分析
  • 图中描述了三个中断控制器,对应到三种不同的映射方式;
  • 各个控制器的硬件中断号可以一样,最终在Linux内核中映射的中断号是唯一的;

linux内核中,所有的irq domain被挂入一个全局链表,链表头定义如下:

static LIST_HEAD(irq_domain_list);
           

注册irq_domain

线性映射(line map)

  其实就是一个lookup table,HW interrupt ID作为index,通过查表可以获取对应的IRQ number。对于Linear map而言,interrupt controller对其HW interrupt ID进行编码的时候要满足一定的条件:hw ID不能过大,而且ID排列最好是紧密的。对于线性映射,其接口API如下:

/**
 * irq_domain_add_linear() - Allocate and register a linear revmap irq_domain.
 * @of_node: pointer to interrupt controller's device tree node.
 * @size: Number of interrupts in the domain.
 * @ops: map/unmap domain callbacks
 * @host_data: Controller private data pointer
 */
static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_node,
					 unsigned int size,//该irq_domain支持的irq数目
					 const struct irq_domain_ops *ops,//callback函数
					 void *host_data)//driver私有数据
{
	return __irq_domain_add(of_node_to_fwnode(of_node), size, size, 0, ops, host_data);
}
           

树状映射(tree map)

  建立一个Radix Tree来维护HW interrupt ID到IRQ number映射关系。HW interrupt ID作为lookup key,在Radix Tree检索到IRQ number。如果的确不能满足线性映射的条件,可以考虑Radix Tree map。实际上,内核中使用Radix Tree map的只有powerPC和MIPS的硬件平台。对于Radix Tree map,其接口API如下:

static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node,
					 const struct irq_domain_ops *ops,
					 void *host_data)
{
	return __irq_domain_add(of_node_to_fwnode(of_node), 0, ~0, 0, ops, host_data);
}
           

no map

  有些中断控制器很强,可以通过寄存器配置HW interrupt ID而不是由物理连接决定的。例如PowerPC 系统使用的MPIC (Multi-Processor Interrupt Controller)。在这种情况下,不需要进行映射,我们直接把IRQ number写入HW interrupt ID配置寄存器就OK了,这时候,生成的HW interrupt ID就是IRQ number,也就不需要进行mapping了。对于这种类型的映射,其接口API如下:

static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_node,
					 unsigned int max_irq,
					 const struct irq_domain_ops *ops,
					 void *host_data)
{
	return __irq_domain_add(of_node_to_fwnode(of_node), 0, max_irq, max_irq, ops, host_data);
}
           

  这类接口的逻辑很简单,根据自己的映射类型,初始化struct irq_domain中的各个成员,调用__irq_domain_add将该irq domain挂入irq_domain_list的全局列表。

创建irq_domain映射

  上节的内容主要是向系统注册一个irq domain,具体HW interrupt ID和IRQ number的映射关系都是空的,因此,具体各个irq domain如何管理映射所需要的database还是需要建立的。例如:对于线性映射的irq domain,我们需要建立线性映射的lookup table,对于Radix Tree map,我们要把那个反应IRQ number和HW interrupt ID的Radix tree建立起来。创建映射有四个接口函数:

  (1)调用irq_create_mapping函数建立HW interrupt ID和IRQ number的映射关系。该接口函数以irq domain和HW interrupt ID为参数,返回IRQ number(这个IRQ number是动态分配的)。该函数的原型定义如下:

/**
 * irq_create_mapping() - Map a hardware interrupt into linux irq space
 * @domain: domain owning this hardware interrupt or NULL for default domain
 * @hwirq: hardware irq number in that domain space
 *
 * Only one mapping per hardware interrupt is permitted. Returns a linux
 * irq number.
 * If the sense/trigger is to be specified, set_irq_type() should be called
 * on the number returned from that call.
 */
unsigned int irq_create_mapping(struct irq_domain *domain,
		    irq_hw_number_t hwirq);
           

  驱动调用该函数的时候必须提供HW interrupt ID,也就是意味着driver知道自己使用的HW interrupt ID,而一般情况下,HW interrupt ID其实对具体的driver应该是不可见的,不过有些场景比较特殊,例如GPIO类型的中断,它的HW interrupt ID和GPIO有着特定的关系,driver知道自己使用那个GPIO,也就是知道使用哪一个HW interrupt ID了。

  (2)irq_create_strict_mappings。这个接口函数用来为一组HW interrupt ID建立映射。具体函数的原型定义如下:

extern int irq_create_strict_mappings(struct irq_domain *domain,
				      unsigned int irq_base,
				      irq_hw_number_t hwirq_base, int count);
           

  (3)irq_create_of_mapping。看到函数名字中的of(open firmware),我想你也可以猜到了几分,这个接口当然是利用device tree进行映射关系的建立。具体函数的原型定义如下:

extern unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data);
           

  通常,一个普通设备的device tree node已经描述了足够的中断信息,在这种情况下,该设备的驱动在初始化的时候可以调用irq_of_parse_and_map这个接口函数进行该device node中和中断相关的内容(interrupts和interrupt-parent属性)进行分析,并建立映射关系,具体代码如下:

/**
 * irq_of_parse_and_map - Parse and map an interrupt into linux virq space
 * @dev: Device node of the device whose interrupt is to be mapped
 * @index: Index of the interrupt to map
 *
 * This function is a wrapper that chains of_irq_parse_one() and
 * irq_create_of_mapping() to make things easier to callers
 */
unsigned int irq_of_parse_and_map(struct device_node *dev, int index)
{
	struct of_phandle_args oirq;

	if (of_irq_parse_one(dev, index, &oirq))    //分析device node中的interrupt相关属性
		return 0;

	return irq_create_of_mapping(&oirq);    //创建映射,并返回对应的IRQ number
}
EXPORT_SYMBOL_GPL(irq_of_parse_and_map);
           

  对于一个使用Device tree的普通驱动程序(我们推荐这样做),基本上初始化需要调用irq_of_parse_and_map获取IRQ number,然后调用request_threaded_irq申请中断handler。

  (4)irq_create_direct_mapping。这是给no map那种类型的interrupt controller使用的,这里不再赘述。

gic创建irq_domain

static int gic_init_bases(struct gic_chip_data *gic,
			  struct fwnode_handle *handle)
{
	int gic_irqs, ret;
    ......
	/*
	 * Find out how many interrupts are supported.
	 * The GIC only supports up to 1020 interrupt sources.
	 */
	//获取gic最多支持的中断数
	gic_irqs = readl_relaxed(gic_data_dist_base(gic) + GIC_DIST_CTR) & 0x1f;
	gic_irqs = (gic_irqs + 1) * 32;
	if (gic_irqs > 1020)
		gic_irqs = 1020;
	gic->gic_irqs = gic_irqs;

	//注册一个irq domain的数据结构
	if (handle) {		/* DT/ACPI */
		gic->domain = irq_domain_create_linear(handle, gic_irqs,
						       &gic_irq_domain_hierarchy_ops,
						       gic);
	} else {		/* Legacy support */
		/*
		 * For primary GICs, skip over SGIs.
		 * No secondary GIC support whatsoever.
		 */
		int irq_base;

		gic_irqs -= 16; /* calculate # of irqs to allocate */

		irq_base = irq_alloc_descs(16, 16, gic_irqs,
					   numa_node_id());
		if (irq_base < 0) {
			WARN(1, "Cannot allocate irq_descs @ IRQ16, assuming pre-allocated\n");
			irq_base = 16;
		}

		gic->domain = irq_domain_add_legacy(NULL, gic_irqs, irq_base,
						    16, &gic_irq_domain_ops, gic);
	}

   ......

	return 0;

error:
	if (IS_ENABLED(CONFIG_GIC_NON_BANKED) && gic->percpu_offset) {
		free_percpu(gic->dist_base.percpu_base);
		free_percpu(gic->cpu_base.percpu_base);
	}

	return ret;
}
           
/**
 * __irq_domain_add() - Allocate a new irq_domain data structure
 * @fwnode: firmware node for the interrupt controller
 * @size: Size of linear map; 0 for radix mapping only
 * @hwirq_max: Maximum number of interrupts supported by controller
 * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
 *              direct mapping
 * @ops: domain callbacks
 * @host_data: Controller private data pointer
 *
 * Allocates and initializes an irq_domain structure.
 * Returns pointer to IRQ domain, or NULL on failure.
 */
struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
				    irq_hw_number_t hwirq_max, int direct_max,
				    const struct irq_domain_ops *ops,
				    void *host_data)
{
	struct device_node *of_node = to_of_node(fwnode);
	struct irqchip_fwid *fwid;
	struct irq_domain *domain;

	static atomic_t unknown_domains;

	domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size),
			      GFP_KERNEL, of_node_to_nid(of_node));
	if (!domain)
		return NULL;

	if (fwnode && is_fwnode_irqchip(fwnode)) {
		fwid = container_of(fwnode, struct irqchip_fwid, fwnode);

		switch (fwid->type) {
		case IRQCHIP_FWNODE_NAMED:
		case IRQCHIP_FWNODE_NAMED_ID:
			domain->fwnode = fwnode;
			domain->name = kstrdup(fwid->name, GFP_KERNEL);
			if (!domain->name) {
				kfree(domain);
				return NULL;
			}
			domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
			break;
		default:
			domain->fwnode = fwnode;
			domain->name = fwid->name;
			break;
		}
#ifdef CONFIG_ACPI
	} else if (is_acpi_device_node(fwnode)) {
		struct acpi_buffer buf = {
			.length = ACPI_ALLOCATE_BUFFER,
		};
		acpi_handle handle;

		handle = acpi_device_handle(to_acpi_device_node(fwnode));
		if (acpi_get_name(handle, ACPI_FULL_PATHNAME, &buf) == AE_OK) {
			domain->name = buf.pointer;
			domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
		}

		domain->fwnode = fwnode;
#endif
	} else if (of_node) {
		char *name;

		/*
		 * DT paths contain '/', which debugfs is legitimately
		 * unhappy about. Replace them with ':', which does
		 * the trick and is not as offensive as '\'...
		 */
		name = kasprintf(GFP_KERNEL, "%pOF", of_node);
		if (!name) {
			kfree(domain);
			return NULL;
		}

		strreplace(name, '/', ':');

		domain->name = name;
		domain->fwnode = fwnode;
		domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
	}

	if (!domain->name) {
		if (fwnode)
			pr_err("Invalid fwnode type for irqdomain\n");
		domain->name = kasprintf(GFP_KERNEL, "unknown-%d",
					 atomic_inc_return(&unknown_domains));
		if (!domain->name) {
			kfree(domain);
			return NULL;
		}
		domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
	}

	of_node_get(of_node);

	/* Fill structure */
	INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL);
	mutex_init(&domain->revmap_tree_mutex);
	/* 初始化 domain */
	domain->ops = ops;
	domain->host_data = host_data;
	domain->hwirq_max = hwirq_max;
	domain->revmap_size = size;
	domain->revmap_direct_max_irq = direct_max;
	irq_domain_check_hierarchy(domain);

	mutex_lock(&irq_domain_mutex);
	debugfs_add_domain_dir(domain);
	list_add(&domain->link, &irq_domain_list);//将domain添加到irq_domain_list中
	mutex_unlock(&irq_domain_mutex);

	pr_debug("Added domain %s\n", domain->name);
	return domain;
}
           

gic_irq_domain_hierarchy_ops定义:

static const struct irq_domain_ops gic_irq_domain_hierarchy_ops = {
	.translate = gic_irq_domain_translate,
	.alloc = gic_irq_domain_alloc,
	.free = irq_domain_free_irqs_top,
};
           

gic创建irq_chip

static void gic_init_chip(struct gic_chip_data *gic, struct device *dev,
			  const char *name, bool use_eoimode1)
{
	/* Initialize irq_chip */
	gic->chip = gic_chip;
	gic->chip.name = name;
	gic->chip.parent_device = dev;

	if (use_eoimode1) {
		gic->chip.irq_mask = gic_eoimode1_mask_irq;
		gic->chip.irq_eoi = gic_eoimode1_eoi_irq;
		gic->chip.irq_set_vcpu_affinity = gic_irq_set_vcpu_affinity;
	}

#ifdef CONFIG_SMP
	if (gic == &gic_data[0])
		gic->chip.irq_set_affinity = gic_set_affinity;
#endif
}
           

gic_eoimode1_mask_irq分析:

static void gic_eoimode1_mask_irq(struct irq_data *d)
{
	gic_mask_irq(d);
	/*
	 * When masking a forwarded interrupt, make sure it is
	 * deactivated as well.
	 *
	 * This ensures that an interrupt that is getting
	 * disabled/masked will not get "stuck", because there is
	 * noone to deactivate it (guest is being terminated).
	 */
	if (irqd_is_forwarded_to_vcpu(d))
		gic_poke_irq(d, GIC_DIST_ACTIVE_CLEAR);
}
           

gic_eoimode1_eoi_irq分析:

static void gic_eoimode1_eoi_irq(struct irq_data *d)
{
	/* Do not deactivate an IRQ forwarded to a vcpu. */
	if (irqd_is_forwarded_to_vcpu(d))
		return;

	writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_DEACTIVATE);
}
           

gic_irq_set_vcpu_affinity分析:

static int gic_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
{
	/* Only interrupts on the primary GIC can be forwarded to a vcpu. */
	if (cascading_gic_irq(d))
		return -EINVAL;

	if (vcpu)
		irqd_set_forwarded_to_vcpu(d);
	else
		irqd_clr_forwarded_to_vcpu(d);
	return 0;
}
           

gic_set_affinity分析:

static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
			    bool force)
{
	void __iomem *reg = gic_dist_base(d) + GIC_DIST_TARGET + gic_irq(d);
	unsigned int cpu;

	if (!force)
		cpu = cpumask_any_and(mask_val, cpu_online_mask);
	else
		cpu = cpumask_first(mask_val);

	if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids)
		return -EINVAL;

	writeb_relaxed(gic_cpu_map[cpu], reg);
	irq_data_update_effective_affinity(d, cpumask_of(cpu));

	return IRQ_SET_MASK_OK_DONE;
}
           

继续阅读