Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions paddle/fluid/framework/attribute.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,9 @@ struct ExtractAttribute<float> {
} else if (attr.type() == typeid(int64_t)) { // NOLINT
int64_t val = PADDLE_GET_CONST(int64_t, attr);
attr = static_cast<float>(val);
} else if (attr.type() == typeid(double)) { // NOLINT
double val = PADDLE_GET_CONST(double, attr);
attr = static_cast<float>(val);
}
float* attr_value = nullptr;
try {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/ir/fused_attention_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1510,7 +1510,7 @@ ir::Graph* FusedAttentionsPass::BackwardHandlerHelper(
fused_attention_grad_op_desc.SetAttr(
"epsilon",
PADDLE_GET_CONST(
float, pre_layer_norm_grad_op_node->Op()->GetAttr("epsilon")));
double, pre_layer_norm_grad_op_node->Op()->GetAttr("epsilon")));
std::vector<int> shape =
PADDLE_GET_CONST(std::vector<int>,
fuse_qkv_reshape_grad_op_node->Op()->GetAttr("shape"));
Expand Down
7 changes: 4 additions & 3 deletions paddle/fluid/framework/ir/layer_norm_fuse_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ LayerNormFusePass::LayerNormFusePass() {
.IsOptional()
.End()
.AddAttr("epsilon")
.IsNumGE(0.0f)
.IsNumLE(0.001f)
.IsNumGE(0.0)
.IsNumLE(0.001)
.End()
.AddAttr("begin_norm_axis")
.IsNumGT(0)
Expand Down Expand Up @@ -378,7 +378,8 @@ void LayerNormFusePass::ApplyImpl(Graph* graph) const {
setIntermediateOut(&ln_op_desc, "Mean", scope_name_);
setIntermediateOut(&ln_op_desc, "Variance", scope_name_);
ln_op_desc.SetAttr("begin_norm_axis", begin_norm_axis);
ln_op_desc.SetAttr("epsilon", *(eps_tensor->data<float>()));
ln_op_desc.SetAttr("epsilon",
static_cast<double>(*(eps_tensor->data<float>())));
ln_op_desc.SetAttr("is_test", true);

if (!IsCompat(ln_op_desc)) {
Expand Down
45 changes: 45 additions & 0 deletions paddle/fluid/ir_adaptor/translator/op_translator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1160,6 +1160,49 @@ struct GroupNormOpTranscriber : public OpTranscriber {
}
};

struct LayerNormOpTranscriber : public OpTranscriber {
pir::AttributeMap TranslateOpAttribute(
pir::IrContext* ctx,
const std::string& normalized_op_name,
const OpAttributeInfoList& op_attr_infos,
const OpDesc& op_desc) override {
auto& attribute_translator = AttributeTranslator::instance();
auto& op_normalizer = OpNameNormalizer::instance();
pir::AttributeMap attribute_map = {};

for (const auto& info : op_attr_infos) {
auto legacy_attr_name =
op_normalizer.GetLegacyAttrName(op_desc.Type(), info.name);
VLOG(10) << "[op: " << op_desc.Type()
<< "][attr] from: " << legacy_attr_name << " to: " << info.name;
if (op_desc.HasAttr(legacy_attr_name)) {
paddle::framework::Attribute legacy_attr =
op_desc.GetAttr(legacy_attr_name);
VLOG(10) << "attribute in " << op_desc.Type()
<< " name: " << legacy_attr_name << " " << legacy_attr.index();
pir::Attribute new_attr =
attribute_translator(info.type_name, legacy_attr);
if (legacy_attr_name == "epsilon") {
// Convert epsilon from float to double for precision alignment
if (new_attr.isa<pir::FloatAttribute>()) {
new_attr = pir::DoubleAttribute::get(
ctx,
static_cast<double>(
new_attr.dyn_cast<pir::FloatAttribute>().data()));
}
}
attribute_map[info.name] = new_attr;
} else {
VLOG(10) << "attribute in " << op_desc.Type()
<< " name: " << legacy_attr_name << " doesn't exist";
this->HandleNonexistentAttribute(ctx, &attribute_map, info);
}
}

return attribute_map;
}
};

struct InterpolateOpTranscriber : public OpTranscriber {
pir::AttributeMap TranslateOpAttribute(
pir::IrContext* ctx,
Expand Down Expand Up @@ -4169,6 +4212,8 @@ OpTranslator::OpTranslator() {
special_handlers["leaky_relu_grad"] = LeakyReLUOpTranscriber();
special_handlers["group_norm"] = GroupNormOpTranscriber();
special_handlers["group_norm_grad"] = GroupNormOpTranscriber();
special_handlers["layer_norm"] = LayerNormOpTranscriber();
special_handlers["layer_norm_grad"] = LayerNormOpTranscriber();
special_handlers["bilinear_interp"] = InterpolateOpTranscriber();
special_handlers["bilinear_interp_grad"] = InterpolateOpTranscriber();
special_handlers["nearest_interp"] = InterpolateOpTranscriber();
Expand Down
5 changes: 5 additions & 0 deletions paddle/fluid/pir/serialize_deserialize/patch/4.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,8 @@ op_patches:
object : porder
type : pir::DoubleAttribute
data : 2
- op_name : pd_op.layer_norm
actions:
- action : modify_attr
object : epsilon
type : pir::DoubleAttribute
6 changes: 5 additions & 1 deletion paddle/fluid/pir/transforms/gpu/add_norm_fuse_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -263,9 +263,13 @@ class AddLayerNormFusePattern : public paddle::drr::DrrPatternBase {
});
const auto cast_1_op =
res.Op(paddle::dialect::CastOp::name(), {{"dtype", cast_op_dtype}});
const auto &fused_epsilon = res.ComputeAttr(
[](const paddle::drr::MatchContext &match_ctx) -> float {
return static_cast<float>(match_ctx.Attr<double>("epsilon"));
});
const auto &fuse_layer_norm =
res.Op(paddle::dialect::FusedBiasResidualLayernormOp::name(),
{{"epsilon", pat.Attr("epsilon")},
{{"epsilon", fused_epsilon},
{"residual_alpha", res.Float32Attr(1.0)},
{"begin_norm_axis", pat.Attr("begin_norm_axis")},
{"quant_scale", res.Float32Attr(-1.0)},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,14 @@ class Fused2EmbeddingEltwiseLayernormPattern
res.Op(paddle::dialect::CastOp::name(), {{"dtype", cast_op_dtype}});
res.Tensor("casted_scale") = cast_op_2(res.Tensor("scale"));

const auto &fused_epsilon = res.ComputeAttr(
[](const paddle::drr::MatchContext &match_ctx) -> float {
return static_cast<float>(match_ctx.Attr<double>("epsilon"));
});
const auto &fused_embedding_eltwise_layernorm_op =
res.Op(paddle::dialect::FusedEmbeddingEltwiseLayernormOp::name(),
{{
{"epsilon", pat.Attr("epsilon")},
{"epsilon", fused_epsilon},
}});
fused_embedding_eltwise_layernorm_op({&res.Tensor("combine1_out"),
&res.Tensor("combine2_out"),
Expand Down Expand Up @@ -188,10 +192,14 @@ class Fused3EmbeddingEltwiseLayernormPattern
res.Op(paddle::dialect::CastOp::name(), {{"dtype", cast_op_dtype}});
res.Tensor("casted_scale") = cast_op_2(res.Tensor("scale"));

const auto &fused_epsilon = res.ComputeAttr(
[](const paddle::drr::MatchContext &match_ctx) -> float {
return static_cast<float>(match_ctx.Attr<double>("epsilon"));
});
const auto &fused_embedding_eltwise_layernorm_op =
res.Op(paddle::dialect::FusedEmbeddingEltwiseLayernormOp::name(),
{{
{"epsilon", pat.Attr("epsilon")},
{"epsilon", fused_epsilon},
}});
fused_embedding_eltwise_layernorm_op({&res.Tensor("combine1_out"),
&res.Tensor("combine2_out"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,16 @@ class FcElementwiseLayerNormFusePattern : public paddle::drr::DrrPatternBase {
res.Op(paddle::dialect::CastOp::name(), {{"dtype", cast_op_dtype}});
res.Tensor("casted_scale") = cast_op_2(res.Tensor("scale"));

const auto &fused_epsilon = res.ComputeAttr(
[](const paddle::drr::MatchContext &match_ctx) -> float {
return static_cast<float>(match_ctx.Attr<double>("epsilon"));
});
const auto &fused_fc_elementwise_op =
res.Op(paddle::dialect::FusedFcElementwiseLayernormOp::name(),
{{
{"x_num_col_dims", pat.Attr("in_num_col_dims")},
{"activation_type", pat.Attr("activation_type")},
{"epsilon", pat.Attr("epsilon")},
{"epsilon", fused_epsilon},
{"begin_norm_axis", pat.Attr("begin_norm_axis")},
}});
fused_fc_elementwise_op({&res.Tensor("x"),
Expand Down
6 changes: 5 additions & 1 deletion paddle/fluid/pir/transforms/xpu/add_layernorm_fuse_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,13 @@ class AddLayernormPattern : public paddle::drr::DrrPatternBase {

paddle::drr::ResultPattern res = pat.ResultPattern();

const auto &fused_epsilon = res.ComputeAttr(
[](const paddle::drr::MatchContext &match_ctx) -> float {
return static_cast<float>(match_ctx.Attr<double>("epsilon"));
});
const auto &add_layernorm_xpu =
res.Op(paddle::dialect::AddLayernormXpuOp::name(),
{{{"epsilon", pat.Attr("epsilon")},
{{{"epsilon", fused_epsilon},
{"begin_norm_axis", pat.Attr("begin_norm_axis")}}});
add_layernorm_xpu({&res.Tensor("x"),
&res.Tensor("y"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -924,7 +924,7 @@ void layer_norm_grad(const Tensor& x,
const Tensor& mean,
const Tensor& variance,
const Tensor& out_grad,
float epsilon,
double epsilon,
int begin_norm_axis,
Tensor* x_grad,
Tensor* scale_grad,
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/primitive/decomp_rule/decomp_rule/composite.h
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ std::tuple<Tensor, Tensor, Tensor> layer_norm_decomp(
const Tensor& x,
const paddle::optional<Tensor>& scale,
const paddle::optional<Tensor>& bias,
float epsilon,
double epsilon,
int begin_norm_axis) {
std::vector<int64_t> reduce_axis;
auto org_dtype = x.dtype();
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/primitive/decomp_rule/decomp_vjp/details.h
Original file line number Diff line number Diff line change
Expand Up @@ -972,7 +972,7 @@ void layer_norm_grad(const Tensor& x,
const Tensor& mean,
const Tensor& variance,
const Tensor& out_grad,
float epsilon,
double epsilon,
int begin_norm_axis,
Tensor* x_grad,
Tensor* scale_grad,
Expand Down
1 change: 1 addition & 0 deletions paddle/phi/api/ext/op_meta_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ struct KernelFuncImpl<Return (*)(Args...), impl_fn> {
PD_SPECIALIZE_ComputeCallHelper(const bool&);
PD_SPECIALIZE_ComputeCallHelper(const int&);
PD_SPECIALIZE_ComputeCallHelper(const float&);
PD_SPECIALIZE_ComputeCallHelper(const double&);
PD_SPECIALIZE_ComputeCallHelper(const int64_t&);

// NOTE(chenweihang): Used to be compatible with the 2.1 released
Expand Down
17 changes: 17 additions & 0 deletions paddle/phi/core/distributed/auto_parallel/inferspmd_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,23 @@ bool InferSpmdContext::AttrAt(size_t idx) const {
}
}

template <>
double InferSpmdContext::AttrAt(size_t idx) const {
try {
auto attr = attrs_.at(idx);
if (attr.type() == typeid(float)) {
return static_cast<double>(paddle::get<float>(attr));
} else {
return paddle::get<double>(attr);
}
} catch (paddle::bad_variant_access const& e) {
PADDLE_THROW(common::errors::InvalidArgument(
"Attribute cast error in InferSpmd Context, the input attr type is "
"`%s`, but the expected attribute type is `double`.",
attrs_.at(idx).type().name()));
}
}

template <>
std::vector<int> InferSpmdContext::AttrAt(size_t idx) const {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ struct InferSpmdFnImpl<Return (*)(Args...), infer_spmd_fn> {
PD_SPECIALIZE_InferSpmdFnCallHelper_FOR_ATTRIBUTE(bool);
PD_SPECIALIZE_InferSpmdFnCallHelper_FOR_ATTRIBUTE(int);
PD_SPECIALIZE_InferSpmdFnCallHelper_FOR_ATTRIBUTE(float);
PD_SPECIALIZE_InferSpmdFnCallHelper_FOR_ATTRIBUTE(double);
PD_SPECIALIZE_InferSpmdFnCallHelper_FOR_ATTRIBUTE(int64_t);
PD_SPECIALIZE_InferSpmdFnCallHelper_FOR_ATTRIBUTE(DataType);
PD_SPECIALIZE_InferSpmdFnCallHelper_FOR_CONST_ATTRIBUTE_REF(std::vector<int>);
Expand Down
10 changes: 5 additions & 5 deletions paddle/phi/infermeta/spmd_rules/layer_norm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ using phi::distributed::auto_parallel::str_join;
SpmdInfo LayerNormInferSpmd(const DistMetaTensor& x,
const DistMetaTensor& scale,
const DistMetaTensor& bias,
float epsilon,
double epsilon,
int begin_norm_axis) {
// Step0: verify input args based on layer_norm logic
auto x_shape = common::vectorize(x.dims());
Expand Down Expand Up @@ -154,7 +154,7 @@ SpmdInfo LayerNormInferSpmdReverse(const DistMetaTensor& x,
const DistMetaTensor& out,
const DistMetaTensor& mean,
const DistMetaTensor& variance,
float epsilon,
double epsilon,
int begin_norm_axis) {
// Step0: Verify input args based on layer_norm logic
auto x_shape = common::vectorize(x.dims());
Expand Down Expand Up @@ -298,7 +298,7 @@ SpmdInfo LayerNormGradInferSpmd(const DistMetaTensor& x,
const DistMetaTensor& mean,
const DistMetaTensor& variance,
const DistMetaTensor out_grad,
float epsilon,
double epsilon,
int begin_norm_axis) {
auto get_shape = [](const auto& meta) {
return common::vectorize<int64_t>(meta.dims());
Expand Down Expand Up @@ -451,7 +451,7 @@ SpmdInfo LayerNormGradInferSpmd(const DistMetaTensor& x,
SpmdInfo FastLnInferSpmd(const DistMetaTensor& x,
const DistMetaTensor& scale,
const DistMetaTensor& bias,
float epsilon) {
double epsilon) {
int begin_norm_axis = x.dims().size() - 1;
VLOG(4) << "FastLnInferSpmd call LayerNormInferSpmd with begin_norm_axis="
<< begin_norm_axis;
Expand All @@ -463,7 +463,7 @@ SpmdInfo FastLnGradInferSpmd(const DistMetaTensor& x,
const DistMetaTensor& mean,
const DistMetaTensor& invvar,
const DistMetaTensor& y_grad,
float epsilon) {
double epsilon) {
int begin_norm_axis = x.dims().size() - 1;
const DistMetaTensor& bias(scale); // bias is not used in FastLnGrad
VLOG(4)
Expand Down
10 changes: 5 additions & 5 deletions paddle/phi/infermeta/spmd_rules/layer_norm.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace distributed {
SpmdInfo LayerNormInferSpmd(const DistMetaTensor& x,
const DistMetaTensor& scale,
const DistMetaTensor& bias,
float epsilon,
double epsilon,
int begin_norm_axis);

SpmdInfo LayerNormGradInferSpmd(const DistMetaTensor& x,
Expand All @@ -32,7 +32,7 @@ SpmdInfo LayerNormGradInferSpmd(const DistMetaTensor& x,
const DistMetaTensor& mean,
const DistMetaTensor& variance,
const DistMetaTensor out_grad,
float epsilon = 1e-5,
double epsilon = 1e-5,
int begin_norm_axis = 1);

SpmdInfo LayerNormInferSpmdReverse(const DistMetaTensor& x,
Expand All @@ -41,20 +41,20 @@ SpmdInfo LayerNormInferSpmdReverse(const DistMetaTensor& x,
const DistMetaTensor& out,
const DistMetaTensor& mean,
const DistMetaTensor& variance,
float epsilon,
double epsilon,
int begin_norm_axis);

SpmdInfo FastLnInferSpmd(const DistMetaTensor& x,
const DistMetaTensor& scale,
const DistMetaTensor& bias,
float epsilon);
double epsilon);

SpmdInfo FastLnGradInferSpmd(const DistMetaTensor& x,
const DistMetaTensor& scale,
const DistMetaTensor& mean,
const DistMetaTensor& invvar,
const DistMetaTensor& y_grad,
float epsilon);
double epsilon);

} // namespace distributed
} // namespace phi
2 changes: 1 addition & 1 deletion paddle/phi/infermeta/ternary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1371,7 +1371,7 @@ void GroupNormInferMeta(const MetaTensor& x,
void LayerNormInferMeta(const MetaTensor& x,
const MetaTensor& scale,
const MetaTensor& bias,
float epsilon,
double epsilon,
int begin_norm_axis,
MetaTensor* out,
MetaTensor* mean,
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/infermeta/ternary.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ PADDLE_API void GroupNormInferMeta(const MetaTensor& x,
PADDLE_API void LayerNormInferMeta(const MetaTensor& x,
const MetaTensor& scale,
const MetaTensor& bias,
float epsilon,
double epsilon,
int begin_norm_axis,
MetaTensor* out,
MetaTensor* mean,
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/cpu/layer_norm_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ void LayerNormGradKernel(const Context& dev_ctx,
const DenseTensor& mean,
const DenseTensor& variance,
const DenseTensor& out_grad,
float epsilon,
double epsilon,
int begin_norm_axis,
DenseTensor* x_grad,
DenseTensor* scale_grad,
Expand Down
8 changes: 4 additions & 4 deletions paddle/phi/kernels/cpu/layer_norm_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ namespace phi {
template <typename T, typename Context>
void LayerNormKernel(const Context& dev_ctx,
const DenseTensor& x,
const paddle::optional<DenseTensor>& scale_opt,
const paddle::optional<DenseTensor>& bias_opt,
float epsilon,
const optional<DenseTensor>& scale_opt,
const optional<DenseTensor>& bias_opt,
double epsilon,
int begin_norm_axis,
DenseTensor* y,
DenseTensor* mean,
Expand Down Expand Up @@ -141,7 +141,7 @@ void LayerNormKernel(const Context& dev_ctx,
scale ? scale->data<T>() : nullptr,
bias ? bias->data<T>() : nullptr,
static_cast<int>(left),
static_cast<float>(epsilon),
static_cast<double>(epsilon),
right);
#endif
}
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/funcs/jit/kernel_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ struct LayerNormTuple {
typedef T data_type;
typedef int attr_type;
typedef void (*func_type)(
T*, T*, T*, T*, const T*, const T*, int, const float, int);
T*, T*, T*, T*, const T*, const T*, int, const double, int);
};

// Just for adding to kernel pool without template
Expand Down
Loading
Loading