Skip to content
This repository was archived by the owner on Jan 24, 2024. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 7 additions & 10 deletions seq2seq/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
运行本目录下的范例模型需要安装PaddlePaddle Fluid 1.7版。如果您的 PaddlePaddle 安装版本低于此要求,请按照[安装文档](https://www.paddlepaddle.org.cn/#quick-start)中的说明更新 PaddlePaddle 安装版本。
运行本目录下的范例模型需要安装PaddlePaddle 2.0版。如果您的 PaddlePaddle 安装版本低于此要求,请按照[安装文档](https://www.paddlepaddle.org.cn/#quick-start)中的说明更新 PaddlePaddle 安装版本。

# Sequence to Sequence (Seq2Seq)

Expand All @@ -12,8 +12,7 @@
├── download.py # 数据下载程序
├── train.py # 训练主程序
├── predict.py # 预测主程序
├── seq2seq_attn.py # 带注意力机制的翻译模型程序
└── seq2seq_base.py # 无注意力机制的翻译模型程序
└── seq2seq_base.py # 翻译模型程序
```

## 简介
Expand All @@ -35,7 +34,7 @@ Sequence to Sequence (Seq2Seq),使用编码器-解码器(Encoder-Decoder)
git clone https://github.com/PaddlePaddle/hapi
cd hapi
export PYTHONPATH=$PYTHONPATH:`pwd`
cd examples/seq2seq
cd seq2seq
```

## 数据介绍
Expand Down Expand Up @@ -157,21 +156,19 @@ python predict.py \
使用 [*multi-bleu.perl*](https://github.com/moses-smt/mosesdecoder.git) 工具来评价模型预测的翻译质量,使用方法如下:

```sh
mosesdecoder/scripts/generic/multi-bleu.perl tst2013.vi < infer_output.txt
perl mosesdecoder/scripts/generic/multi-bleu.perl tst2013.vi < infer_output.txt
```

每个模型分别训练了10次,单次取第10个epoch保存的模型进行预测,取beam_size=10。效果如下(为了便于观察,对10次结果按照升序进行了排序):

```
> no attention
tst2012 BLEU:
[10.75 10.85 10.9 10.94 10.97 11.01 11.01 11.04 11.13 11.4]

tst2013 BLEU:
[10.71 10.71 10.74 10.76 10.91 10.94 11.02 11.16 11.21 11.44]

> with attention
tst2012 BLEU:
[21.14 22.34 22.54 22.65 22.71 22.71 23.08 23.15 23.3 23.4]

tst2013 BLEU:
[23.41 24.79 25.11 25.12 25.19 25.24 25.39 25.61 25.61 25.63]
```
24.94
9 changes: 6 additions & 3 deletions seq2seq/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ def parse_args():
parser.add_argument(
"--attention",
type=eval,
default=False,
help="Whether use attention model")
default=True,
help="Whether use attention in model")

parser.add_argument(
"--optimizer",
Expand All @@ -56,11 +56,13 @@ def parse_args():
type=int,
default=1,
help="layers number of encoder and decoder")

parser.add_argument(
"--hidden_size",
type=int,
default=100,
help="hidden size of encoder and decoder")

parser.add_argument("--src_vocab_size", type=int, help="source vocab size")
parser.add_argument("--tar_vocab_size", type=int, help="target vocab size")

Expand Down Expand Up @@ -105,6 +107,7 @@ def parse_args():

parser.add_argument(
"--infer_file", type=str, help="file name for inference")

parser.add_argument(
"--infer_output_file",
type=str,
Expand All @@ -120,7 +123,7 @@ def parse_args():
help='Whether using gpu [True|False]')

parser.add_argument(
'--eager_run', type=eval, default=False, help='Whether to use dygraph')
'--eager_run', type=eval, default=True, help='Whether to use dygraph')

parser.add_argument(
"--enable_ce",
Expand Down
2 changes: 1 addition & 1 deletion seq2seq/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def main(arguments):
url = remote_path + '/' + filename
tar_file = os.path.join(tar_path, filename)
URLLIB.urlretrieve(url, tar_file)
print("Downloaded sucess......")
print("Downloaded success......")


if __name__ == '__main__':
Expand Down
19 changes: 19 additions & 0 deletions seq2seq/infer.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
export CUDA_VISIBLE_DEVICES="4"
python predict.py \
--attention True \
--src_lang en --tar_lang vi \
--num_layers 2 \
--hidden_size 512 \
--src_vocab_size 17191 \
--tar_vocab_size 7709 \
--batch_size 128 \
--dropout 0.2 \
--init_scale 0.1 \
--max_grad_norm 5.0 \
--vocab_prefix data/en-vi/vocab \
--infer_file data/en-vi/tst2013.en \
--reload_model attention_models/10 \
--infer_output_file infer_output.txt \
--beam_size 10 \
--use_gpu True \
--eager_run True
15 changes: 6 additions & 9 deletions seq2seq/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,11 @@

import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.layers.utils import flatten
from paddle.fluid.io import DataLoader
from paddle.io import DataLoader
from paddle.static import InputSpec as Input

from args import parse_args
from seq2seq_base import BaseInferModel
from seq2seq_attn import AttentionInferModel
from seq2seq import Seq2SeqInfer
from reader import Seq2SeqDataset, Seq2SeqBatchSampler, SortType, prepare_infer_input


Expand All @@ -50,7 +47,7 @@ def post_process_seq(seq, bos_idx, eos_idx, output_bos=False,

def do_predict(args):
device = paddle.set_device("gpu" if args.use_gpu else "cpu")
fluid.enable_dygraph(device) if args.eager_run else None
paddle.enable_static() if not args.eager_run else None

# define model
inputs = [
Expand Down Expand Up @@ -84,14 +81,14 @@ def do_predict(args):
num_workers=0,
return_list=True)

model_maker = AttentionInferModel if args.attention else BaseInferModel
model = paddle.Model(
model_maker(
Seq2SeqInfer(
args.src_vocab_size,
args.tar_vocab_size,
args.hidden_size,
args.hidden_size,
args.num_layers,
args.attention,
args.dropout,
bos_id=bos_id,
eos_id=eos_id,
Expand All @@ -109,7 +106,7 @@ def do_predict(args):
# TODO(guosheng): use model.predict when support variant length
with io.open(args.infer_output_file, 'w', encoding='utf-8') as f:
for data in data_loader():
finished_seq = model.test_batch(inputs=flatten(data))[0]
finished_seq = model.test_batch(inputs=list(data))[0]
finished_seq = finished_seq[:, :, np.newaxis] if len(
finished_seq.shape) == 2 else finished_seq
finished_seq = np.transpose(finished_seq, [0, 2, 1])
Expand Down
16 changes: 7 additions & 9 deletions seq2seq/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,8 @@
from functools import partial

import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.fluid.io import BatchSampler, DataLoader, Dataset
import paddle
from paddle.io import BatchSampler, DataLoader, Dataset


def create_data_loader(args, device, for_train=True):
Expand Down Expand Up @@ -68,16 +67,15 @@ def create_data_loader(args, device, for_train=True):
num_workers=0,
return_list=True)
data_loaders[i] = data_loader
return data_loaders
return data_loaders, eos_id


def prepare_train_input(insts, bos_id, eos_id, pad_id):
src, src_length = pad_batch_data(
[inst[0] for inst in insts], pad_id=pad_id)
trg, trg_length = pad_batch_data(
[inst[1] for inst in insts], pad_id=pad_id)
trg_length = trg_length - 1
return src, src_length, trg[:, :-1], trg_length, trg[:, 1:, np.newaxis]
return src, src_length, trg[:, :-1], trg[:, 1:, np.newaxis]


def prepare_infer_input(insts, bos_id, eos_id, pad_id):
Expand Down Expand Up @@ -359,9 +357,9 @@ def __init__(self,
self._random.seed(seed)
# for multi-devices
self._distribute_mode = distribute_mode
self._nranks = ParallelEnv().nranks
self._local_rank = ParallelEnv().local_rank
self._device_id = ParallelEnv().dev_id
self._nranks = paddle.distributed.ParallelEnv().nranks
self._local_rank = paddle.distributed.ParallelEnv().local_rank
self._device_id = paddle.distributed.ParallelEnv().dev_id

def __iter__(self):
# global sort or global shuffle
Expand Down
Loading