Skip to content
版 本

asc_copy_l0c2gm

产 品 支 持 情 况

产 品是 否 支 持
Atlas A3 训 练 系 列 产 品/Atlas A3 推 理 系 列 产 品
Atlas A2 训 练 系 列 产 品/Atlas A2 推 理 系 列 产 品

功 能 说 明

矩 阵 计 算 完 成 后,对 结 果 进 行 量 化 处 理,之 后 将 处 理 结 果 搬 运 到GM中。量 化 模 式 共 分 为9种,分 别 为:

  • NoQuant:不 开 启 量 化 功 能。
  • F322F16:float量 化 成half。量 化 结 果 不 支 持INF_NAN模 式。
  • F322BF16:float量 化 成bfloat16_t。量 化 结 果 不 支 持INF_NAN模 式。
  • DEQF16:int32_t量 化 成half。量 化 结 果 不 支 持INF_NAN模 式。
  • VDEQF16:int32_t量 化 成half。量 化 结 果 不 支 持INF_NAN模 式。
  • QF322B8_PRE:float量 化 成uint8_t/int8_t。scalar量 化。
  • VQF322B8_PRE:float量 化 成uint8_t/int8_t。矢 量 量 化。
  • REQ8:int32_t量 化 成uint8_t/int8_t。scalar量 化。
  • VREQ8:int32_t量 化 成uint8_t/int8_t。矢 量 量 化。

函 数 原 型

  • 常 规 搬 运

    C++
    __aicore__ inline void asc_copy_l0c2gm(__gm__ half* dst, __cc__ float* src, uint16_t n_size, uint16_t m_size, uint32_t dst_stride_dst_d, uint16_t src_stride, uint8_t unit_flag_mode, uint64_t quant_pre, uint8_t relu_pre, bool channel_split, bool nz2nd_en)
    __aicore__ inline void asc_copy_l0c2gm(__gm__ bfloat16_t* dst, __cc__ float* src, uint16_t n_size, uint16_t m_size, uint32_t dst_stride_dst_d, uint16_t src_stride, uint8_t unit_flag_mode, uint64_t quant_pre, uint8_t relu_pre, bool channel_split, bool nz2nd_en)
    __aicore__ inline void asc_copy_l0c2gm(__gm__ int8_t* dst, __cc__ float* src, uint16_t n_size, uint16_t m_size, uint32_t dst_stride_dst_d, uint16_t src_stride, uint8_t unit_flag_mode, uint64_t quant_pre, uint8_t relu_pre, bool channel_split, bool nz2nd_en)
    __aicore__ inline void asc_copy_l0c2gm(__gm__ uint8_t* dst, __cc__ float* src, uint16_t n_size, uint16_t m_size, uint32_t dst_stride_dst_d, uint16_t src_stride, uint8_t unit_flag_mode, uint64_t quant_pre, uint8_t relu_pre, bool channel_split, bool nz2nd_en)
    __aicore__ inline void asc_copy_l0c2gm(__gm__ float* dst, __cc__ float* src, uint16_t n_size, uint16_t m_size, uint32_t dst_stride_dst_d, uint16_t src_stride, uint8_t unit_flag_mode, uint64_t quant_pre, uint8_t relu_pre, bool channel_split, bool nz2nd_en)
    __aicore__ inline void asc_copy_l0c2gm(__gm__ half* dst, __cc__ int32_t* src, uint16_t n_size, uint16_t m_size, uint32_t dst_stride_dst_d, uint16_t src_stride, uint8_t unit_flag_mode, uint64_t quant_pre, uint8_t relu_pre, bool channel_split, bool nz2nd_en)
    __aicore__ inline void asc_copy_l0c2gm(__gm__ int16_t* dst, __cc__ int32_t* src, uint16_t n_size, uint16_t m_size, uint32_t dst_stride_dst_d, uint16_t src_stride, uint8_t unit_flag_mode, uint64_t quant_pre, uint8_t relu_pre, bool channel_split, bool nz2nd_en)
    __aicore__ inline void asc_copy_l0c2gm(__gm__ int8_t* dst, __cc__ int32_t* src, uint16_t n_size, uint16_t m_size, uint32_t dst_stride_dst_d, uint16_t src_stride, uint8_t unit_flag_mode, uint64_t quant_pre, uint8_t relu_pre, bool channel_split, bool nz2nd_en)
    __aicore__ inline void asc_copy_l0c2gm(__gm__ int32_t* dst, __cc__ int32_t* src, uint16_t n_size, uint16_t m_size, uint32_t dst_stride_dst_d, uint16_t src_stride, uint8_t unit_flag_mode, uint64_t quant_pre, uint8_t relu_pre, bool channel_split, bool nz2nd_en)
    
  • 同 步 搬 运

    C++
    __aicore__ inline void asc_copy_l0c2gm_sync(__gm__ half* dst, __cc__ float* src, uint16_t n_size, uint16_t m_size, uint32_t dst_stride_dst_d, uint16_t src_stride, uint8_t unit_flag_mode, uint64_t quant_pre, uint8_t relu_pre, bool channel_split, bool nz2nd_en)
    __aicore__ inline void asc_copy_l0c2gm_sync(__gm__ bfloat16_t* dst, __cc__ float* src, uint16_t n_size, uint16_t m_size, uint32_t dst_stride_dst_d, uint16_t src_stride, uint8_t unit_flag_mode, uint64_t quant_pre, uint8_t relu_pre, bool channel_split, bool nz2nd_en)
    __aicore__ inline void asc_copy_l0c2gm_sync(__gm__ int8_t* dst, __cc__ float* src, uint16_t n_size, uint16_t m_size, uint32_t dst_stride_dst_d, uint16_t src_stride, uint8_t unit_flag_mode, uint64_t quant_pre, uint8_t relu_pre, bool channel_split, bool nz2nd_en)
    __aicore__ inline void asc_copy_l0c2gm_sync(__gm__ uint8_t* dst, __cc__ float* src, uint16_t n_size, uint16_t m_size, uint32_t dst_stride_dst_d, uint16_t src_stride, uint8_t unit_flag_mode, uint64_t quant_pre, uint8_t relu_pre, bool channel_split, bool nz2nd_en)
    __aicore__ inline void asc_copy_l0c2gm_sync(__gm__ float* dst, __cc__ float* src, uint16_t n_size, uint16_t m_size, uint32_t dst_stride_dst_d, uint16_t src_stride, uint8_t unit_flag_mode, uint64_t quant_pre, uint8_t relu_pre, bool channel_split, bool nz2nd_en)
    __aicore__ inline void asc_copy_l0c2gm_sync(__gm__ half* dst, __cc__ int32_t* src, uint16_t n_size, uint16_t m_size, uint32_t dst_stride_dst_d, uint16_t src_stride, uint8_t unit_flag_mode, uint64_t quant_pre, uint8_t relu_pre, bool channel_split, bool nz2nd_en)
    __aicore__ inline void asc_copy_l0c2gm_sync(__gm__ int16_t* dst, __cc__ int32_t* src, uint16_t n_size, uint16_t m_size, uint32_t dst_stride_dst_d, uint16_t src_stride, uint8_t unit_flag_mode, uint64_t quant_pre, uint8_t relu_pre, bool channel_split, bool nz2nd_en)
    __aicore__ inline void asc_copy_l0c2gm_sync(__gm__ int8_t* dst, __cc__ int32_t* src, uint16_t n_size, uint16_t m_size, uint32_t dst_stride_dst_d, uint16_t src_stride, uint8_t unit_flag_mode, uint64_t quant_pre, uint8_t relu_pre, bool channel_split, bool nz2nd_en)
    __aicore__ inline void asc_copy_l0c2gm_sync(__gm__ int32_t* dst, __cc__ int32_t* src, uint16_t n_size, uint16_t m_size, uint32_t dst_stride_dst_d, uint16_t src_stride, uint8_t unit_flag_mode,  uint64_t quant_pre, uint8_t relu_pre, bool channel_split, bool nz2nd_en)
    

参 数 说 明

参 数 名输 入/输 出描 述
dst输 出目 的 操 作 数(矢 量)的 起 始 地 址。
src输 入源 操 作 数(矢 量)的 起 始 地 址。
n_size输 入源NZ矩 阵 在N方 向 上 的 大 小。
- 不 开 启NZ2ND功 能:若 开 启channel_split功 能,n_size必 须 为8的 倍 数,取 值 范 围 为[1, 4095]。若 不 开 启channel_split功 能,n_size必 须 为16的 倍 数,取 值 范 围 为[1, 4095]。
- 开 启NZ2ND功 能:m_size的 取 值 范 围 为[1, 4095]。
m_size输 入源NZ矩 阵 在M方 向 上 的 大 小。
- 不 开 启NZ2ND功 能:取 值 范 围 为[1, 65535]。
- 开 启NZ2ND功 能,m_size的 取 值 范 围 为[1, 8192]。
dst_stride_dst_d输 入
- 不 开 启NZ2ND功 能,目 的NZ矩 阵 中 相 邻Z排 布 的 起 始 地 址 偏 移,取 值 不 为0,单 位:element。
- 开 启NZ2ND/NZ2DN功 能,目 的ND矩 阵 每 一 行 中 的 元 素 个 数,取 值 不 为0 ,单 位:element。
src_stride输 入源NZ矩 阵 中 相 邻Z排 布 的 起 始 地 址 偏 移,取 值 范 围:[0, 65535],单 位:C0_Size(16*sizeof(T), T为src的 数 据 类 型)。
unit_flag_mode输 入与unit_flag参 数 相 关,取 值 如 下:
0:保 留 值;
2:开 启unit_flag,硬 件 执 行 完 指 令 之 后,不 会 设 置 寄 存 器;
3:开 启unit_flag,硬 件 执 行 完 指 令 后,会 将unit_flag关 闭。
quant_pre输 入量 化 参 数。取 值 见功 能 说 明
relu_pre输 入开 启relu。
channel_split输 入是 否 开 启 通 道 拆 分 的 功 能,默 认false,不 开 启 该 功 能。仅 在src和dst都 为float时 才 能 开 启 通 道 拆 分,且 不 能 同 时 开 启channel_split和NZ2ND功 能。
nz2nd_en输 入开 启nz2nd开 关,false:不 开 启;true:开 启。

返 回 值 说 明

流 水 类 型

PIPE_MTE1

约 束 说 明

调 用 示 例

C++
// total_length指 参 与 搬 运 的 数 据 总 长 度
constexpr uint64_t total_length = 128;
// dst src分 别 对 应 目 的 操 作 数 的 输 出 地 址 和 源 操 作 数 的 输 入 地 址
__gm__ int32_t dst[total_length];
__cc__ int32_t src[total_length];
// 其 余 入 参 均 已 默 认 数 值 传 入
uint16_t n_size = 16;
uint16_t m_size = 16;
uint32_t dst_stride_dst_d = 0;
uint16_t src_stride = 8;
uint8_t unit_flag_mode = 0;
uint64_t quant_pre = NoQuant;
uint8_t relu_pre = 0;
bool channel_split = false;
bool nz2nd_en = false;
// 函 数 调 用
asc_copy_l0c2gm_sync(dst, src, n_size, m_size, dst_stride_dst_d, src_stride, unit_flag_mode, quant_pre, relu_pre, channel_split, nz2nd_en);

免 责 声 明:本 站 内 容 由 asc-devkit 仓 master 分 支 自 动 编 译 生 成,属 于 持 续 开 发 版 本,可 能 存 在 缺 陷,仅 供 预 览 与 参 考。如 需 稳 定 及 商 用 资 料,请 查 阅 官 方 昇 腾 社 区