xsq modified

a8710829 · xushangqing · e26b16c7 · a8710829 · a8710829 · a8710829
Commit a8710829 authored Jul 15, 2021 by xushangqing
73 changed files
--- a/.gitignore
+++ b/.gitignore
 venv
 .env
+data/train_dataset/cpp-cuda-*
+*.err
+*.out
+*.log
+*.tmp
+*log
+*.patch
+*.tok
+*.bpe
+trained/*
\ No newline at end of file
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
+{
+    "python.pythonPath": "/lustre/S/xushangqing/anaconda3/bin/python"
+}
\ No newline at end of file
--- a/README.md
+++ b/README.md
@@ -3,6 +3,12 @@
 Pytorch original implementation of TransCoder in [Unsupervised Translation of Programming Languages](https://arxiv.org/pdf/2006.03511.pdf)
 ![Model](https://dl.fbaipublicfiles.com/transcoder/TransCoder_Schema.jpg)

+## XSQ Comment
+
+Data preprocess script which works on original data from BigQuery is in ./selfmade/filter.py
+
+for network failure in training process, move multi-bleu.perl(modified in the bleu calculation part) into XLM/src/evaluation/
+
 ## Dependencies
 - Python 3
 - [NumPy](http://www.numpy.org/)

--- a/XLM/src/__pycache__/__init__.cpython-38.pyc
+++ b/XLM/src/__pycache__/__init__.cpython-38.pyc
--- a/XLM/src/__pycache__/logger.cpython-38.pyc
+++ b/XLM/src/__pycache__/logger.cpython-38.pyc
--- a/XLM/src/__pycache__/optim.cpython-38.pyc
+++ b/XLM/src/__pycache__/optim.cpython-38.pyc
--- a/XLM/src/__pycache__/slurm.cpython-38.pyc
+++ b/XLM/src/__pycache__/slurm.cpython-38.pyc
--- a/XLM/src/__pycache__/trainer.cpython-38.pyc
+++ b/XLM/src/__pycache__/trainer.cpython-38.pyc
--- a/XLM/src/__pycache__/utils.cpython-38.pyc
+++ b/XLM/src/__pycache__/utils.cpython-38.pyc
--- a/XLM/src/data/__pycache__/__init__.cpython-38.pyc
+++ b/XLM/src/data/__pycache__/__init__.cpython-38.pyc
--- a/XLM/src/data/__pycache__/dataset.cpython-38.pyc
+++ b/XLM/src/data/__pycache__/dataset.cpython-38.pyc
--- a/XLM/src/data/__pycache__/dictionary.cpython-38.pyc
+++ b/XLM/src/data/__pycache__/dictionary.cpython-38.pyc
--- a/XLM/src/data/__pycache__/loader.cpython-38.pyc
+++ b/XLM/src/data/__pycache__/loader.cpython-38.pyc
--- a/XLM/src/data/dataset.py
+++ b/XLM/src/data/dataset.py
@@ -226,6 +226,8 @@ class Dataset(object):
        if tokens_per_batch == -1:
            batches = np.array_split(indices, math.ceil(
                len(indices) * 1. / self.batch_size))
+            # print(batches)
+            # exit()
        else:
            batch_ids = np.cumsum(lengths[indices]) // tokens_per_batch
            _, bounds = np.unique(batch_ids, return_index=True)
@@ -292,7 +294,11 @@ class ParallelDataset(Dataset):
            sentences_WITH_IDS = sentences
            sentences = []
            ids_ = []
+            # print(sentences_WITH_IDS)
+            # print("sep_index = ", self.sep_index)
+            #exit()
            for s in sentences_WITH_IDS:
+                # print(np.where(s == self.sep_index))
                pos = np.where(s == self.sep_index)[0][0]
                sentences.append(s[pos + 1:])
                ids_.append(s[:pos])
@@ -332,6 +338,8 @@ class ParallelDataset(Dataset):
        """
        eos = self.eos_index
        # check number of sentences
+        # print("pos1 = ", self.pos1)
+        # print("pos2 = ", self.pos2)
        assert len(self.pos1) == len(self.pos2) > 0
        # check sentences indices
        assert len(self.pos1) == (self.sent1[self.pos1[:, 1]] == eos).sum()
@@ -418,6 +426,12 @@ class ParallelDataset(Dataset):
                sentence_ids = sentence_ids[:self.max_batch_size]
            pos1 = self.pos1[sentence_ids]
            pos2 = self.pos2[sentence_ids]
+            #print(pos1)
+            #print(pos2)
+            #print(self.sent1)
+            #print(len(self.sent1))
+            #print(self.sent2)
+
            sent1 = self.batch_sentences([self.sent1[a:b] for a, b in pos1])
            sent2 = self.batch_sentences([self.sent2[a:b] for a, b in pos2])
            yield (sent1, sent2, sentence_ids) if return_indices else (sent1, sent2)
@@ -448,6 +462,8 @@ class ParallelDataset(Dataset):
        if tokens_per_batch == -1:
            batches = np.array_split(indices, math.ceil(
                len(indices) * 1. / self.batch_size))
+            # print("tokens_per_batch, batches = " , batches)
+            # exit()
        else:
            batch_ids = np.cumsum(lengths[indices]) // tokens_per_batch
            _, bounds = np.unique(batch_ids, return_index=True)
@@ -455,6 +471,8 @@ class ParallelDataset(Dataset):
                       for i in range(len(bounds) - 1)]
            if bounds[-1] < len(indices):
                batches.append(indices[bounds[-1]:])
+            # print("tokens_per_batch, batches = " , batches)
+            # exit()

        # optionally shuffle batches
        if shuffle:

--- a/XLM/src/data/dictionary.py
+++ b/XLM/src/data/dictionary.py
@@ -172,11 +172,11 @@ class Dictionary(object):
        """
        Index sentences with a dictionary.
        """
-        if bin_path is not None and os.path.isfile(bin_path):
-            print("Loading data from %s ..." % bin_path)
-            data = torch.load(bin_path)
-            assert dico == data['dico']
-            return data
+        # if bin_path is not None and os.path.isfile(bin_path):
+        #     print("Loading data from %s ..." % bin_path)
+        #     data = torch.load(bin_path)
+        #     assert dico == data['dico']
+        #     return data

        positions = []
        sentences = []
@@ -231,4 +231,5 @@ class Dictionary(object):
            print("Saving the data to %s ..." % bin_path)
            torch.save(data, bin_path, pickle_protocol=4)

+
        return data
--- a/XLM/src/data/loader.py
+++ b/XLM/src/data/loader.py
@@ -63,16 +63,22 @@ def load_binarized(path, params):
    assert path.endswith('.pth')
    if params.debug_train:
        path = path.replace('train', 'valid')
-    if getattr(params, 'multi_gpu', False):
+    # print("## here ## {}".format(getattr(params, 'multi_gpu', False)))
+    if not getattr(params, 'multi_gpu', False):
+        # print("here")
        assert params.split_data_accross_gpu in ['local', 'global']
+        # print(" ## here ## %s %i %i" % path, params.local_rank, params.global_rank )
        if params.split_data_accross_gpu == 'local':
            split_path = '%s.%i.pth' % (path[:-4], params.local_rank)
        else:
            split_path = '%s.%i.pth' % (path[:-4], params.global_rank)

+        print(" ## here ## %s %i %i" % (split_path, params.local_rank, params.global_rank) )
        if os.path.isfile(split_path):
            assert params.split_data is False
            path = split_path
+        else :
+            print("warning : split path not available {}".format(split_path))
    assert os.path.isfile(path), path
    logger.info("Loading data from %s ..." % path)
    data = torch.load(path)
@@ -89,6 +95,7 @@ def set_dico_parameters(params, data, dico):
    else:
        data['dico'] = dico
    n_words = len(dico)
+    print("n_words = ", n_words)
    bos_index = dico.index(BOS_WORD)
    eos_index = dico.index(EOS_WORD)
    pad_index = dico.index(PAD_WORD)
@@ -205,6 +212,11 @@ def load_para_data(params, data):
            tgt_data = load_binarized(tgt_path, params)

            # update dictionary parameters
+            # print("load_parallel_data")
+            # print(params)
+            # print(data)
+            # print(src_data['dico'])
+            # exit()
            set_dico_parameters(params, data, src_data['dico'])
            set_dico_parameters(params, data, tgt_data['dico'])

@@ -310,6 +322,8 @@ def check_data_params(params):
            if not os.path.isfile(p):
                logger.error(f"{p} not found")
    if not params.eval_only:
+        for paths in params.mono_dataset.values():
+            print("mono_path = ", paths)
        assert all([all([os.path.isfile(p) or os.path.isfile(p.replace('pth', '0.pth'))
                    for p in paths.values()]) for paths in params.mono_dataset.values()])

@@ -320,8 +334,10 @@ def check_data_params(params):
        [(l2, l3) for _, l2, l3 in params.bt_steps])
    params.para_dataset = {
        (src, tgt): {
-            splt: (os.path.join(params.data_path, '%s.%s-%s.%s.pth' % (splt, src, tgt, src)),
-                   os.path.join(params.data_path, '%s.%s-%s.%s.pth' % (splt, src, tgt, tgt)))
+            splt: (os.path.join(params.data_path, '%s.%s-%s.%s.pth' % (splt,src, tgt, src)),
+                   os.path.join(params.data_path, '%s.%s-%s.%s.pth' % (splt,src, tgt, tgt)))
+            # splt: (os.path.join(params.data_path, '%s.%s.pth' % (splt, src)),
+            #        os.path.join(params.data_path, '%s.%s.pth' % (splt, tgt)))
            for splt in ['train', 'valid', 'test']
            if splt != 'train' or (src, tgt) in required_para_train or (tgt, src) in required_para_train
        } for src in params.langs for tgt in params.langs

--- a/XLM/src/evaluation/__pycache__/__init__.cpython-38.pyc
+++ b/XLM/src/evaluation/__pycache__/__init__.cpython-38.pyc
--- a/XLM/src/evaluation/__pycache__/evaluator.cpython-38.pyc
+++ b/XLM/src/evaluation/__pycache__/evaluator.cpython-38.pyc
--- a/XLM/src/evaluation/evaluator.py
+++ b/XLM/src/evaluation/evaluator.py
@@ -447,9 +447,9 @@ class EncDecEvaluator(Evaluator):
                           langs=langs1, causal=False)
            enc1 = enc1.transpose(0, 1)
            enc1 = enc1.half() if params.fp16 else enc1
-            if max(len2) > 1024:
-                print('remove one long sentence')
-                continue
+            # if max(len2) > 1024:
+            #     logger.info('remove one long sentence')
+            #     continue
            # decode target sentence
            dec2 = decoder('fwd', x=x2, lengths=len2, langs=langs2,
                           causal=True, src_enc=enc1, src_len=len1)
@@ -480,8 +480,11 @@ class EncDecEvaluator(Evaluator):
                        lengths, _ = lengths.reshape(-1,
                                                     params.number_samples).max(dim=1)
                    else:
+                        # logger.info(" fuck  3")
                        generated, lengths = decoder.generate(
                            enc1, len1, lang2_id, max_len=len_v)
+                        # test for
+                        # exit()
                    # print(f'path 1: {generated.shape}')

                else:
@@ -493,7 +496,9 @@ class EncDecEvaluator(Evaluator):
                        max_len=len_v
                    )
                    # print(f'path 2: {generated.shape}')
-                hypothesis.extend(convert_to_text(
+                # exit()
+                logger.info(" fuck  dump hyp txt")
+                hypothesis.extend(convert_to_text_for_hyp(
                    generated, lengths, self.dico, params, generate_several_reps=True))

        # compute perplexity and prediction accuracy
@@ -608,6 +613,48 @@ def convert_to_text(batch, lengths, dico, params, generate_several_reps=False):
    else:
        return [s[0] for s in sentences]

+def convert_to_text_for_hyp(batch, lengths, dico, params, generate_several_reps=False):
+    """
+    Convert a batch of sentences to a list of text sentences.
+    """
+    batch = batch.cpu().numpy()
+    lengths = lengths.cpu().numpy()
+
+    assert len(batch.shape) == 2 or len(
+        batch.shape) == 3, f'generated batch shape was {batch.shape} while it should be in dimension 2 or 3'
+    nb_repetitions = 1
+    if len(batch.shape) == 2:
+        slen, bs = batch.shape
+        assert (batch[0] == params.eos_index).sum() == bs
+        assert (batch == params.eos_index).sum() == 2 * bs
+    else:
+        slen, nb_repetitions, bs = batch.shape
+        assert (batch == params.eos_index).sum() == 2 * bs * nb_repetitions
+        assert (batch[0] == params.eos_index).sum() == bs * nb_repetitions, print(
+            f"The values were {(batch[0] == params.eos_index).sum()} and  {bs * nb_repetitions}")
+    assert lengths.max() == slen and lengths.shape[0] == bs, print(
+        lengths.max(), slen, lengths.shape[0], bs)
+    sentences = []
+
+    for j in range(bs):
+        sentences.append([])
+        for rep in range(nb_repetitions):
+            words = []
+            for k in range(1, lengths[j]):
+                next_element = batch[k, j] if len(
+                    batch.shape) == 2 else batch[k, rep, j]
+                # print("element = ", next_element)
+                if next_element == params.eos_index:
+                    # print("word unk ")
+                    break
+                words.append(dico[next_element])
+                # print("words = ", dico[next_element])
+            sentences[j].append(" ".join(words))
+    if generate_several_reps:
+        return sentences
+    else:
+        return [s[0] for s in sentences]
+

 def eval_moses_bleu(ref, hyp):
    """
@@ -625,4 +672,5 @@ def eval_moses_bleu(ref, hyp):
        return float(result[7:result.index(',')])
    else:
        logger.warning('Impossible to parse BLEU score! "%s"' % result)
-        return -1
+        return 0
+        # return -1
--- a/XLM/src/evaluation/multi-bleu.perl
+++ b/XLM/src/evaluation/multi-bleu.perl
+#!/usr/bin/env perl
+#
+# This file is part of moses.  Its use is licensed under the GNU Lesser General
+# Public License version 2.1 or, at your option, any later version.
+
+# $Id$
+use warnings;
+use strict;
+
+my $lowercase = 0;
+if ($ARGV[0] eq "-lc") {
+  $lowercase = 1;
+  shift;
+}
+
+my $stem = $ARGV[0];
+if (!defined $stem) {
+  print STDERR "usage: multi-bleu.pl [-lc] reference < hypothesis\n";
+  print STDERR "Reads the references from reference or reference0, reference1, ...\n";
+  exit(1);
+}
+
+$stem .= ".ref" if !-e $stem && !-e $stem."0" && -e $stem.".ref0";
+
+my @REF;
+my $ref=0;
+while(-e "$stem$ref") {
+    &add_to_ref("$stem$ref",\@REF);
+    $ref++;
+}
+&add_to_ref($stem,\@REF) if -e $stem;
+die("ERROR: could not find reference file $stem") unless scalar @REF;
+
+# add additional references explicitly specified on the command line
+shift;
+foreach my $stem (@ARGV) {
+    &add_to_ref($stem,\@REF) if -e $stem;
+}
+
+
+
+sub add_to_ref {
+    my ($file,$REF) = @_;
+    my $s=0;
+    if ($file =~ /.gz$/) {
+	open(REF,"gzip -dc $file|") or die "Can't read $file";
+    } else { 
+	open(REF,$file) or die "Can't read $file";
+    }
+    while(<REF>) {
+	chop;
+	push @{$$REF[$s++]}, $_;
+    }
+    close(REF);
+}
+
+my(@CORRECT,@TOTAL,$length_translation,$length_reference);
+my $s=0;
+while(<STDIN>) {
+    chop;
+    $_ = lc if $lowercase;
+    my @WORD = split;
+    my %REF_NGRAM = ();
+    my $length_translation_this_sentence = scalar(@WORD);
+    my ($closest_diff,$closest_length) = (9999,9999);
+    foreach my $reference (@{$REF[$s]}) {
+#      print "$s $_ <=> $reference\n";
+  $reference = lc($reference) if $lowercase;
+	my @WORD = split(' ',$reference);
+	my $length = scalar(@WORD);
+        my $diff = abs($length_translation_this_sentence-$length);
+	if ($diff < $closest_diff) {
+	    $closest_diff = $diff;
+	    $closest_length = $length;
+	    # print STDERR "$s: closest diff ".abs($length_translation_this_sentence-$length)." = abs($length_translation_this_sentence-$length), setting len: $closest_length\n";
+	} elsif ($diff == $closest_diff) {
+            $closest_length = $length if $length < $closest_length;
+            # from two references with the same closeness to me
+            # take the *shorter* into account, not the "first" one.
+        }
+	for(my $n=1;$n<=4;$n++) {
+	    my %REF_NGRAM_N = ();
+	    for(my $start=0;$start<=$#WORD-($n-1);$start++) {
+		my $ngram = "$n";
+		for(my $w=0;$w<$n;$w++) {
+		    $ngram .= " ".$WORD[$start+$w];
+		}
+		$REF_NGRAM_N{$ngram}++;
+	    }
+	    foreach my $ngram (keys %REF_NGRAM_N) {
+		if (!defined($REF_NGRAM{$ngram}) ||
+		    $REF_NGRAM{$ngram} < $REF_NGRAM_N{$ngram}) {
+		    $REF_NGRAM{$ngram} = $REF_NGRAM_N{$ngram};
+#	    print "$i: REF_NGRAM{$ngram} = $REF_NGRAM{$ngram}<BR>\n";
+		}
+	    }
+	}
+    }
+    $length_translation += $length_translation_this_sentence;
+    $length_reference += $closest_length;
+    for(my $n=1;$n<=4;$n++) {
+	my %T_NGRAM = ();
+	for(my $start=0;$start<=$#WORD-($n-1);$start++) {
+	    my $ngram = "$n";
+	    for(my $w=0;$w<$n;$w++) {
+		$ngram .= " ".$WORD[$start+$w];
+	    }
+	    $T_NGRAM{$ngram}++;
+	}
+	foreach my $ngram (keys %T_NGRAM) {
+	    $ngram =~ /^(\d+) /;
+	    my $n = $1;
+            # my $corr = 0;
+#	print "$i e $ngram $T_NGRAM{$ngram}<BR>\n";
+	    $TOTAL[$n] += $T_NGRAM{$ngram};
+	    if (defined($REF_NGRAM{$ngram})) {
+		if ($REF_NGRAM{$ngram} >= $T_NGRAM{$ngram}) {
+		    $CORRECT[$n] += $T_NGRAM{$ngram};
+                    # $corr =  $T_NGRAM{$ngram};
+#	    print "$i e correct1 $T_NGRAM{$ngram}<BR>\n";
+		}
+		else {
+		    $CORRECT[$n] += $REF_NGRAM{$ngram};
+                    # $corr =  $REF_NGRAM{$ngram};
+#	    print "$i e correct2 $REF_NGRAM{$ngram}<BR>\n";
+		}
+	    }
+            # $REF_NGRAM{$ngram} = 0 if !defined $REF_NGRAM{$ngram};
+            # print STDERR "$ngram: {$s, $REF_NGRAM{$ngram}, $T_NGRAM{$ngram}, $corr}\n"
+	}
+    }
+    $s++;
+}
+my $brevity_penalty = 1;
+my $bleu = 0;
+
+my @bleu=();
+
+for(my $n=1;$n<=4;$n++) {
+  if (defined ($TOTAL[$n])){
+    $bleu[$n]=($TOTAL[$n])?$CORRECT[$n]/$TOTAL[$n]:0;
+    # print STDERR "CORRECT[$n]:$CORRECT[$n] TOTAL[$n]:$TOTAL[$n]\n";
+  }else{
+    $bleu[$n]=0;
+  }
+}
+
+if ($length_reference==0){
+  printf "BLEU = 0, 0/0/0/0 (BP=0, ratio=0, hyp_len=0, ref_len=0)\n";
+  exit(1);
+}
+
+if ($length_translation<$length_reference) {
+  $brevity_penalty = exp(1-$length_reference/$length_translation);
+}
+$bleu = $brevity_penalty * exp((my_log( $bleu[1] ) +
+				my_log( $bleu[2] ) +
+				my_log( $bleu[3] ) +
+				my_log( $bleu[4] ) ) / 4) ;
+printf "BLEU = %.2f, %.1f/%.1f/%.1f/%.1f (BP=%.3f, ratio=%.3f, hyp_len=%d, ref_len=%d)\n",
+    100*$bleu[1],
+    100*$bleu[1],
+    100*$bleu[2],
+    100*$bleu[3],
+    100*$bleu[4],
+    $brevity_penalty,
+    $length_translation / $length_reference,
+    $length_translation,
+    $length_reference;
+
+
+# print STDERR "It is in-advisable to publish scores from multi-bleu.perl.  The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups.  Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization.  Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer.\n";
+
+sub my_log {
+  return -9999999999 unless $_[0];
+  return log($_[0]);
+}
--- a/XLM/src/model/__init__.py
+++ b/XLM/src/model/__init__.py
@@ -79,6 +79,10 @@ def check_model_params(params):
        else:
            s = params.reload_model.split(',')
            assert len(s) == 2
+            print(s)
+            for x in s:
+                print(x)
+                print(os.path.isfile(x))
            assert all([x == '' or os.path.isfile(x) for x in s])

    assert not (params.beam_size > 1 and params.number_samples >
@@ -181,6 +185,8 @@ def build_model(params, dico):
                    enc_path, map_location=lambda storage, loc: storage.cuda(params.local_rank))
                enc_reload = enc_reload['model' if 'model' in enc_reload else 'encoder']
                if all([k.startswith('module.') for k in enc_reload.keys()]):
+                    for k,v in enc_reload.items():
+                        print("name = ", k)
                    enc_reload = {k[len('module.'):]: v for k,
                                  v in enc_reload.items()}


--- a/XLM/src/model/__pycache__/__init__.cpython-38.pyc
+++ b/XLM/src/model/__pycache__/__init__.cpython-38.pyc
--- a/XLM/src/model/__pycache__/pretrain.cpython-38.pyc
+++ b/XLM/src/model/__pycache__/pretrain.cpython-38.pyc
--- a/XLM/src/model/__pycache__/transformer.cpython-38.pyc
+++ b/XLM/src/model/__pycache__/transformer.cpython-38.pyc
--- a/XLM/src/model/transformer.py
+++ b/XLM/src/model/transformer.py
@@ -40,6 +40,9 @@ logger = getLogger()


 def Embedding(num_embeddings, embedding_dim, padding_idx=None):
+    print("fuck,", num_embeddings)
+    print("fuck,", embedding_dim)
+    print("fuck,", padding_idx)
    m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx)
    nn.init.normal_(m.weight, mean=0, std=embedding_dim ** -0.5)
    if padding_idx is not None:
@@ -459,6 +462,11 @@ class TransformerModel(nn.Module):
            - (lang_id1, lang_id2) if two languages are involved (MT)
        """

+        saved_args = locals()
+        # print("saved_args = ", saved_args)
+        # print("src_enc.shape: ", src_enc.shape)
+        # print("src_len.shape: ", src_len.shape)
+
        if isinstance(max_len, int):
            max_lengths = src_len.clone().fill_(max_len)
            global_max_len = max_len
@@ -472,6 +480,7 @@ class TransformerModel(nn.Module):

        # generated sentences
        generated = src_len.new(global_max_len, bs)  # upcoming output
+        # print("generated = ", generated.shape)
        generated.fill_(self.pad_index)       # fill upcoming ouput with <PAD>
        # we use <EOS> for <BOS> everywhere
        generated[0].fill_(self.eos_index)
@@ -480,19 +489,28 @@ class TransformerModel(nn.Module):
        positions = src_len.new(global_max_len).long()
        positions = torch.arange(global_max_len, out=positions).unsqueeze(
            1).expand(global_max_len, bs)
+        # print("positions = ", positions)
+        # print("positions = ", positions.shape)

        # language IDs
        langs = src_len.new(global_max_len).long().fill_(tgt_lang_id)
        langs = langs.unsqueeze(1).expand(global_max_len, bs)
+        # print("langs = ", langs)
+        # print("langs = ", langs.shape)

        # current position / max lengths / length of generated sentences / unfinished sentences
        cur_len = 1
        gen_len = src_len.clone().fill_(1)
        unfinished_sents = src_len.clone().fill_(1)
+        # print("gen_len = ", gen_len)
+        # print("unfinished_sents = ", unfinished_sents)

        # cache compute states
        self.cache = {'slen': 0}
        previous_unfinished_mask = unfinished_sents.ne(0)
+        # print("previous_unfinished_mask = ", previous_unfinished_mask)
+        # logger.info("cur_len = ", cur_len)
+        # logger.info("global_max_len = ", global_max_len)
        while cur_len < global_max_len:
            # compute word scores
            unfinished_mask = unfinished_sents.ne(0)
@@ -521,7 +539,11 @@ class TransformerModel(nn.Module):
            assert tensor.size() == (1, unfinished_mask.sum().item(), self.dim), (cur_len,
                                                                                  global_max_len, src_enc.size(), tensor.size(), (1, bs, self.dim))
            tensor = tensor.data[-1, :, :].type_as(src_enc)  # (bs, dim)
+            # print("tensor = ", tensor)
+            # print("tensor = ", tensor.shape)
            scores = self.pred_layer.get_scores(tensor)  # (bs, n_words)
+            # print("scores = ", scores)
+            # print("scores = ", scores.shape)

            # select next words: sample or greedy
            if sample_temperature is None:
@@ -530,6 +552,8 @@ class TransformerModel(nn.Module):
                next_words = torch.multinomial(
                    F.softmax(scores.float() / sample_temperature, dim=1), 1).squeeze(1)
            assert next_words.size() == (unfinished_mask.sum().item(),)
+            # print("next_words = ", next_words)
+            # print("next_words = ", next_words.shape)

            # update generations / lengths / finished sentences / current length.
            # No need to updates the finished sequences since the value is self.pad_index by default
@@ -546,6 +570,7 @@ class TransformerModel(nn.Module):
            previous_unfinished_mask = unfinished_mask
            # stop when there is a </s> in each sentence, or if we exceed the maximal length
            if unfinished_sents.max() == 0:
+                logger.info("fuking break")
                break

        # sanity check

--- a/XLM/src/slurm.py
+++ b/XLM/src/slurm.py
@@ -54,7 +54,8 @@ def init_distributed_mode(params):
        - global_rank
        - world_size
    """
-    params.is_slurm_job = 'SLURM_JOB_ID' in os.environ and not params.debug_slurm
+    # params.is_slurm_job = 'SLURM_JOB_ID' in os.environ and not params.debug_slurm
+    params.is_slurm_job = False
    print("SLURM job: %s" % str(params.is_slurm_job))

    # SLURM job

--- a/XLM/src/trainer.py
+++ b/XLM/src/trainer.py
@@ -359,7 +359,7 @@ class Trainer(object):
        x2 = x.clone()
        for i in range(l.size(0)):
            # generate a random permutation
-            scores = np.arange(l[i] - 1) + noise[:l[i] - 1, i]
+            scores = np.arange(int(l[i] - 1)) + noise[:l[i] - 1, i]
            permutation = scores.argsort()
            # shuffle words
            x2[:l[i] - 1, i].copy_(x2[:l[i] - 1, i]

--- a/XLM/src/utils.py
+++ b/XLM/src/utils.py
@@ -482,6 +482,7 @@ def set_sampling_probs(data, params):
    # monolingual data
    params.mono_list = [
        k for k, v in data['mono_stream'].items() if 'train' in v]
+    print("params.mono_list = ", params.mono_list)
    if len(params.mono_list) > 0:
        probs = np.array([1.0 * len(data['mono_stream'][lang]['train'])
                          for lang in params.mono_list])
@@ -492,6 +493,7 @@ def set_sampling_probs(data, params):

    # parallel data
    params.para_list = [k for k, v in data['para'].items() if 'train' in v]
+    print("params.para_list = ", params.para_list)
    if len(params.para_list) > 0:
        probs = np.array([1.0 * len(data['para'][(l1, l2)]['train'])
                          for (l1, l2) in params.para_list])

--- a/fastBPE @ 036711f8
+++ b/fastBPE @ 036711f8
+Subproject commit 036711f8fdc3265d64e8e123a0761be12c5a8e74
--- a/XLM/train.py
+++ b/XLM/train.py
@@ -101,7 +101,7 @@ def get_parser():
                        help="Maximum vocabulary size (-1 to disable)")
    parser.add_argument("--min_count", type=int, default=0,
                        help="Minimum vocabulary count")
-    parser.add_argument("--lg_sampling_factor", type=float, default=-1,
+    parser.add_argument("--lg_sampling_factor", type=float, default=0.1,
                        help="Language sampling factor")
    parser.add_argument("--has_sentences_ids", type=bool_flag, default=False,
                        help="Parallel sentences has an id or not in parallel datasets.")

--- a/data/train_dataset/cpp/cpp.000.json.gz
+++ b/data/train_dataset/cpp/cpp.000.json.gz
--- a/data/train_dataset/cuda/cuda.000.json.gz
+++ b/data/train_dataset/cuda/cuda.000.json.gz
--- a/multi-bleu.perl
+++ b/multi-bleu.perl
+#!/usr/bin/env perl
+#
+# This file is part of moses.  Its use is licensed under the GNU Lesser General
+# Public License version 2.1 or, at your option, any later version.
+
+# $Id$
+use warnings;
+use strict;
+
+my $lowercase = 0;
+if ($ARGV[0] eq "-lc") {
+  $lowercase = 1;
+  shift;
+}
+
+my $stem = $ARGV[0];
+if (!defined $stem) {
+  print STDERR "usage: multi-bleu.pl [-lc] reference < hypothesis\n";
+  print STDERR "Reads the references from reference or reference0, reference1, ...\n";
+  exit(1);
+}
+
+$stem .= ".ref" if !-e $stem && !-e $stem."0" && -e $stem.".ref0";
+
+my @REF;
+my $ref=0;
+while(-e "$stem$ref") {
+    &add_to_ref("$stem$ref",\@REF);
+    $ref++;
+}
+&add_to_ref($stem,\@REF) if -e $stem;
+die("ERROR: could not find reference file $stem") unless scalar @REF;
+
+# add additional references explicitly specified on the command line
+shift;
+foreach my $stem (@ARGV) {
+    &add_to_ref($stem,\@REF) if -e $stem;
+}
+
+
+
+sub add_to_ref {
+    my ($file,$REF) = @_;
+    my $s=0;
+    if ($file =~ /.gz$/) {
+	open(REF,"gzip -dc $file|") or die "Can't read $file";
+    } else { 
+	open(REF,$file) or die "Can't read $file";
+    }
+    while(<REF>) {
+	chop;
+	push @{$$REF[$s++]}, $_;
+    }
+    close(REF);
+}
+
+my(@CORRECT,@TOTAL,$length_translation,$length_reference);
+my $s=0;
+while(<STDIN>) {
+    chop;
+    $_ = lc if $lowercase;
+    my @WORD = split;
+    my %REF_NGRAM = ();
+    my $length_translation_this_sentence = scalar(@WORD);
+    my ($closest_diff,$closest_length) = (9999,9999);
+    foreach my $reference (@{$REF[$s]}) {
+#      print "$s $_ <=> $reference\n";
+  $reference = lc($reference) if $lowercase;
+	my @WORD = split(' ',$reference);
+	my $length = scalar(@WORD);
+        my $diff = abs($length_translation_this_sentence-$length);
+	if ($diff < $closest_diff) {
+	    $closest_diff = $diff;
+	    $closest_length = $length;
+	    # print STDERR "$s: closest diff ".abs($length_translation_this_sentence-$length)." = abs($length_translation_this_sentence-$length), setting len: $closest_length\n";
+	} elsif ($diff == $closest_diff) {
+            $closest_length = $length if $length < $closest_length;
+            # from two references with the same closeness to me
+            # take the *shorter* into account, not the "first" one.
+        }
+	for(my $n=1;$n<=4;$n++) {
+	    my %REF_NGRAM_N = ();
+	    for(my $start=0;$start<=$#WORD-($n-1);$start++) {
+		my $ngram = "$n";
+		for(my $w=0;$w<$n;$w++) {
+		    $ngram .= " ".$WORD[$start+$w];
+		}
+		$REF_NGRAM_N{$ngram}++;
+	    }
+	    foreach my $ngram (keys %REF_NGRAM_N) {
+		if (!defined($REF_NGRAM{$ngram}) ||
+		    $REF_NGRAM{$ngram} < $REF_NGRAM_N{$ngram}) {
+		    $REF_NGRAM{$ngram} = $REF_NGRAM_N{$ngram};
+#	    print "$i: REF_NGRAM{$ngram} = $REF_NGRAM{$ngram}<BR>\n";
+		}
+	    }
+	}
+    }
+    $length_translation += $length_translation_this_sentence;
+    $length_reference += $closest_length;
+    for(my $n=1;$n<=4;$n++) {
+	my %T_NGRAM = ();
+	for(my $start=0;$start<=$#WORD-($n-1);$start++) {
+	    my $ngram = "$n";
+	    for(my $w=0;$w<$n;$w++) {
+		$ngram .= " ".$WORD[$start+$w];
+	    }
+	    $T_NGRAM{$ngram}++;
+	}
+	foreach my $ngram (keys %T_NGRAM) {
+	    $ngram =~ /^(\d+) /;
+	    my $n = $1;
+            # my $corr = 0;
+#	print "$i e $ngram $T_NGRAM{$ngram}<BR>\n";
+	    $TOTAL[$n] += $T_NGRAM{$ngram};
+	    if (defined($REF_NGRAM{$ngram})) {
+		if ($REF_NGRAM{$ngram} >= $T_NGRAM{$ngram}) {
+		    $CORRECT[$n] += $T_NGRAM{$ngram};
+                    # $corr =  $T_NGRAM{$ngram};
+#	    print "$i e correct1 $T_NGRAM{$ngram}<BR>\n";
+		}
+		else {
+		    $CORRECT[$n] += $REF_NGRAM{$ngram};
+                    # $corr =  $REF_NGRAM{$ngram};
+#	    print "$i e correct2 $REF_NGRAM{$ngram}<BR>\n";
+		}
+	    }
+            # $REF_NGRAM{$ngram} = 0 if !defined $REF_NGRAM{$ngram};
+            # print STDERR "$ngram: {$s, $REF_NGRAM{$ngram}, $T_NGRAM{$ngram}, $corr}\n"
+	}
+    }
+    $s++;
+}
+my $brevity_penalty = 1;
+my $bleu = 0;
+
+my @bleu=();
+
+for(my $n=1;$n<=4;$n++) {
+  if (defined ($TOTAL[$n])){
+    $bleu[$n]=($TOTAL[$n])?$CORRECT[$n]/$TOTAL[$n]:0;
+    # print STDERR "CORRECT[$n]:$CORRECT[$n] TOTAL[$n]:$TOTAL[$n]\n";
+  }else{
+    $bleu[$n]=0;
+  }
+}
+
+if ($length_reference==0){
+  printf "BLEU = 0, 0/0/0/0 (BP=0, ratio=0, hyp_len=0, ref_len=0)\n";
+  exit(1);
+}
+
+if ($length_translation<$length_reference) {
+  $brevity_penalty = exp(1-$length_reference/$length_translation);
+}
+$bleu = $brevity_penalty * exp((my_log( $bleu[1] ) +
+				my_log( $bleu[2] ) +
+				my_log( $bleu[3] ) +
+				my_log( $bleu[4] ) ) / 4) ;
+printf "BLEU = %.2f, %.1f/%.1f/%.1f/%.1f (BP=%.3f, ratio=%.3f, hyp_len=%d, ref_len=%d)\n",
+    100*$bleu,
+    100*$bleu[1],
+    100*$bleu[2],
+    100*$bleu[3],
+    100*$bleu[4],
+    $brevity_penalty,
+    $length_translation / $length_reference,
+    $length_translation,
+    $length_reference;
+
+
+# print STDERR "It is in-advisable to publish scores from multi-bleu.perl.  The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups.  Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization.  Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer.\n";
+
+sub my_log {
+  return -9999999999 unless $_[0];
+  return log($_[0]);
+}
--- a/pic/tmp.py
+++ b/pic/tmp.py
+from matplotlib import pyplot as plt
+import json
+from math import log
+
+def data_readin():
+    lis=None
+    with open('./pretrain_log', 'r') as f:
+        for line in f.readlines():
+            i=json.loads(line.split('__log__:')[1])
+            if not lis:
+                lis={}
+                for k in i.keys():
+                    lis[k]=[]
+            for k in lis.keys():
+                lis[k].append(i[k])
+    pretrain=lis.copy()
+    lis=None
+    with open('./train_log', 'r') as f:
+        for line in f.readlines():
+            i=json.loads(line.split('__log__:')[1])
+            if not lis:
+                lis={}
+                for k in i.keys():
+                    lis[k]=[]
+            for k in lis.keys():
+                lis[k].append(i[k])
+    return pretrain, lis
+
+# 'epoch', 'valid_cpp_mlm_ppl', 'valid_cpp_mlm_acc', 'valid_cuda_mlm_ppl', 'valid_cuda_mlm_acc', 'valid_mlm_ppl', 'valid_mlm_acc', 'test_cpp_mlm_ppl', 'test_cpp_mlm_acc', 'test_cuda_mlm_ppl', 'test_cuda_mlm_acc', 'test_mlm_ppl', 'test_mlm_acc'
+# 'epoch', 'valid_cpp_sa-cuda_sa_mt_ppl', 'valid_cpp_sa-cuda_sa_mt_acc', 'valid_cpp_sa-cuda_sa_mt_bleu', 'test_cpp_sa-cuda_sa_mt_ppl', 'test_cpp_sa-cuda_sa_mt_acc', 'test_cpp_sa-cuda_sa_mt_bleu'
+if __name__ == "__main__":
+    pretrain, train =data_readin()
+    fig=plt.figure(figsize=(10, 5))
+    plt.autoscale()
+    ax1=fig.add_subplot(1, 1, 1)
+    ax2=ax1.twinx()
+    ind1=0
+    ind2=0
+    for k in pretrain.keys():
+        # print(k)
+        if k=='epoch':
+            continue
+        y=pretrain[k]
+        if 'ppl' in k:
+            y=[10*log(i) for i in y]
+        if 'ppl' in k:
+            ax1.plot(y, marker='o', color='C%d'%ind1, label=k)
+            ind1+=1
+        else:
+            ax2.plot(y, marker='*', color='C%d'%ind2, linestyle='-.', label=k)
+            ind2+=1
+
+    ax1.legend(loc='upper left')
+    ax2.legend(loc='upper right')
+    plt.savefig('./pic/tmp.png')
+
+    
\ No newline at end of file
--- a/preprocessing/__pycache__/__init__.cpython-38.pyc
+++ b/preprocessing/__pycache__/__init__.cpython-38.pyc
--- a/preprocessing/__pycache__/preprocess.cpython-38.pyc
+++ b/preprocessing/__pycache__/preprocess.cpython-38.pyc
--- a/preprocessing/preprocess.py
+++ b/preprocessing/preprocess.py
@@ -20,29 +20,52 @@ def check_files_and_symlink_for_XLM(dataset, langs):
    print("check that all files exist...")
    suffixs = {"": "", ".functions_standalone": "_sa"}
    for lang in langs:
-        for cat in ["", ".functions_standalone"]:
+        for cat in [".functions_standalone"]:
            for i in range(8):
                assert dataset.folder.joinpath(
                    f"{lang}.train{dataset.suffix}.{i}{cat}.bpe.pth").is_file()
            assert dataset.folder.joinpath(
-                f"{lang}.test{dataset.suffix}{cat}.bpe.pth").is_file()
+                f"{lang}.train{dataset.suffix}{cat}.bpe.pth").is_file(), "{0} file error".format(f"{lang}.train{dataset.suffix}{cat}.bpe.pth")
+            assert dataset.folder.joinpath(
+                f"{lang}.test{dataset.suffix}{cat}.bpe.pth").is_file(), "{0} file error".format(f"{lang}.test{dataset.suffix}{cat}.bpe.pth")
            assert dataset.folder.joinpath(
                f"{lang}.valid{dataset.suffix}{cat}.bpe.pth").is_file()
    XLM_folder = Path(str(dataset.folder)+'.XLM-syml')
    XLM_folder.mkdir(exist_ok=True)
    print("create symlinks for XLM ...")
    for lang in langs:
-        for cat in ["", ".functions_standalone"]:
+        for cat in [".functions_standalone"]:
            for i in range(8):
                create_symlink(dataset.folder.joinpath(f"{lang}.train{dataset.suffix}.{i}{cat}.bpe.pth"),
                               XLM_folder.joinpath(f"train.{lang}{suffixs[cat]}.{i}.pth"))
+            create_symlink(dataset.folder.joinpath(f"{lang}.train{dataset.suffix}{cat}.bpe.pth"),
+                           XLM_folder.joinpath(f"train.{lang}{suffixs[cat]}.pth"))
            create_symlink(dataset.folder.joinpath(f"{lang}.test{dataset.suffix}{cat}.bpe.pth"),
                           XLM_folder.joinpath(f"test.{lang}{suffixs[cat]}.pth"))
            create_symlink(dataset.folder.joinpath(f"{lang}.valid{dataset.suffix}{cat}.bpe.pth"),
                           XLM_folder.joinpath(f"valid.{lang}{suffixs[cat]}.pth"))

-
-def preprocess(root, lang1, lang2, keep_comments, local, lang3=None, test_size=1000, ncodes=100000, size_gb=50):
+        subprocess.run(f"cp {XLM_folder.joinpath(f'train.{lang}_sa.pth')} {XLM_folder.joinpath(f'train.{lang}.pth')}", shell=True, stdout=subprocess.PIPE,
+                       stderr=subprocess.PIPE)
+        subprocess.run(f"cp {XLM_folder.joinpath(f'test.{lang}_sa.pth')} {XLM_folder.joinpath(f'test.{lang}.pth')}", shell=True, stdout=subprocess.PIPE,
+                       stderr=subprocess.PIPE)
+        subprocess.run(f"cp {XLM_folder.joinpath(f'valid.{lang}_sa.pth')} {XLM_folder.joinpath(f'valid.{lang}.pth')}", shell=True, stdout=subprocess.PIPE,
+                       stderr=subprocess.PIPE)
+    create_symlink(dataset.folder.joinpath(f"cuda.train.cpp_sa-cuda_sa.cuda_sa.bpe.pth"),
+                   XLM_folder.joinpath(f"train.cpp_sa-cuda_sa.cuda_sa.pth"))
+    create_symlink(dataset.folder.joinpath(f"cuda.test.cpp_sa-cuda_sa.cuda_sa.bpe.pth"),
+                   XLM_folder.joinpath(f"test.cpp_sa-cuda_sa.cuda_sa.pth"))
+    create_symlink(dataset.folder.joinpath(f"cuda.valid.cpp_sa-cuda_sa.cuda_sa.bpe.pth"),
+                   XLM_folder.joinpath(f"valid.cpp_sa-cuda_sa.cuda_sa.pth"))
+    create_symlink(dataset.folder.joinpath(f"cpp.train.cpp_sa-cuda_sa.cpp_sa.bpe.pth"),
+                   XLM_folder.joinpath(f"train.cpp_sa-cuda_sa.cpp_sa.pth"))
+    create_symlink(dataset.folder.joinpath(f"cpp.test.cpp_sa-cuda_sa.cpp_sa.bpe.pth"),
+                   XLM_folder.joinpath(f"test.cpp_sa-cuda_sa.cpp_sa.pth"))
+    create_symlink(dataset.folder.joinpath(f"cpp.valid.cpp_sa-cuda_sa.cpp_sa.bpe.pth"),
+                   XLM_folder.joinpath(f"valid.cpp_sa-cuda_sa.cpp_sa.pth"))
+
+
+def preprocess(root, lang1, lang2, keep_comments, local, lang3=None, test_size=1000, ncodes=10000, size_gb=50):
    if size_gb < 1:
        size_gb = None
    dataset = Dataset(root, lang1, lang2, keep_comments,
@@ -66,15 +89,19 @@ def preprocess(root, lang1, lang2, keep_comments, local, lang3=None, test_size=1
    dataset.train_bpe(ncodes=ncodes, size_gb=size_gb)
    dataset.apply_bpe(
        f'train{dataset.suffix}.[01234567].tok', use_vocab=False, executor=cluster_ex2)
-    dataset.apply_bpe(f'test{dataset.suffix}.tok',
+    dataset.apply_bpe(
+        f'train{dataset.suffix}.tok', use_vocab=False, executor=cluster_ex2)
+    dataset.apply_bpe(f'test{dataset}.functions_standalone.tok',
                      use_vocab=False, executor=None)
-    dataset.apply_bpe(f'valid{dataset.suffix}.tok',
+    dataset.apply_bpe(f'valid{dataset}.functions_standalone.tok',
                      use_vocab=False, executor=None)

    dataset.get_vocab(size_gb=size_gb)

    dataset.binarize_for_XLM(
        f'train{dataset.suffix}.[0123456789].bpe', executor=cluster_ex2)
+    dataset.binarize_for_XLM(
+        f'train{dataset.suffix}.bpe', executor=cluster_ex2)
    dataset.binarize_for_XLM(f'test{dataset.suffix}.bpe', executor=None)
    dataset.binarize_for_XLM(f'valid{dataset.suffix}.bpe', executor=None)

@@ -86,10 +113,16 @@ def preprocess(root, lang1, lang2, keep_comments, local, lang3=None, test_size=1
        f'train{dataset.suffix}.[0123456789].functions_standalone.bpe', executor=cluster_ex2)

    dataset.binarize_for_XLM(
+        f'train{dataset.suffix}.functions_standalone.bpe', executor=cluster_ex2)
+
+    dataset.binarize_for_XLM(
        f'test{dataset.suffix}.functions_*.bpe', executor=None)
    dataset.binarize_for_XLM(
        f'valid{dataset.suffix}.functions_*.bpe', executor=None)

+    dataset.binarize_for_XLM(
+        f'*cpp_sa-cuda_sa*.bpe', executor=None)
+
    langs = [lang1, lang2] if lang3 is None else [lang1, lang2, lang3]
    check_files_and_symlink_for_XLM(dataset, langs)


--- a/preprocessing/src/__pycache__/__init__.cpython-38.pyc
+++ b/preprocessing/src/__pycache__/__init__.cpython-38.pyc
--- a/preprocessing/src/__pycache__/code_tokenizer.cpython-38.pyc
+++ b/preprocessing/src/__pycache__/code_tokenizer.cpython-38.pyc
--- a/preprocessing/src/__pycache__/dataset.cpython-38.pyc
+++ b/preprocessing/src/__pycache__/dataset.cpython-38.pyc
--- a/preprocessing/src/__pycache__/javalang_tokenizer.cpython-38.pyc
+++ b/preprocessing/src/__pycache__/javalang_tokenizer.cpython-38.pyc
--- a/preprocessing/src/__pycache__/timeout.cpython-38.pyc
+++ b/preprocessing/src/__pycache__/timeout.cpython-38.pyc
--- a/preprocessing/src/__pycache__/utils.cpython-38.pyc
+++ b/preprocessing/src/__pycache__/utils.cpython-38.pyc
--- a/preprocessing/src/code_tokenizer.py
+++ b/preprocessing/src/code_tokenizer.py
@@ -21,7 +21,7 @@ from preprocessing.src.timeout import timeout, TimeoutError
 from sacrebleu import tokenize_v14_international

 TOK_NO_SPACE_BEFORE = {',', ';'}
-clang.cindex.Config.set_library_path('/usr/lib/llvm-7/lib/')
+clang.cindex.Config.set_library_file('/tools/cluster-software/llvm/llvm-7.0.0/lib/libclang.so')
 STRINGS_AND_COMMENTS_TOKEN_KINDS = {TokenKind.LITERAL, TokenKind.COMMENT}
 logging.basicConfig(
    filename='timeout_cpp_tokenizer_examples.log', level=logging.DEBUG)
@@ -48,6 +48,10 @@ JAVA_CHAR2TOKEN = {"//": ' STOKEN0 ',
 CPP_TOKEN2CHAR = JAVA_TOKEN2CHAR.copy()
 CPP_CHAR2TOKEN = JAVA_CHAR2TOKEN.copy()

+CUDA_TOKEN2CHAR = JAVA_TOKEN2CHAR.copy()
+CUDA_CHAR2TOKEN = JAVA_CHAR2TOKEN.copy()
+
+
 PYTHON_TOKEN2CHAR = {'STOKEN0': '#',
                     'STOKEN1': "\\n",
                     'STOKEN2': '"""',
@@ -329,6 +333,36 @@ def get_cpp_tokens_and_types(s):
        tokens.append((tok.spelling, tok.kind))
    return tokens

+def tokenize_cuda(s, keep_comments=False):
+    tokens = []
+    assert isinstance(s, str)
+    try:
+        tokens_and_types = get_cpp_tokens_and_types(s)
+        for tok, typ in tokens_and_types:
+            if not keep_comments and typ == TokenKind.COMMENT:
+                continue
+            if typ in STRINGS_AND_COMMENTS_TOKEN_KINDS:
+                if typ == TokenKind.COMMENT:
+                    com = process_string(
+                        tok, CPP_CHAR2TOKEN, CPP_TOKEN2CHAR, True)
+                    if len(com) > 0:
+                        tokens.append(com)
+                else:
+                    tokens.append(process_string(
+                        tok, CPP_CHAR2TOKEN, CPP_TOKEN2CHAR, False))
+            else:
+                tokens.append(tok)
+        return tokens
+    except KeyboardInterrupt:
+        raise
+    except TimeoutError:
+        print(f'TimeOut Error')
+        logging.info('*' * 20)
+        logging.info(f'TimeOut Error for string {s}')
+        return []
+    except:
+        return []
+

 def tokenize_cpp(s, keep_comments=False):
    tokens = []
@@ -384,6 +418,60 @@ def tokenize_java(s, keep_comments=False):
        return []


+def detokenize_cuda(s):
+    assert isinstance(s, str) or isinstance(s, list)
+    if isinstance(s, list):
+        s = ' '.join(s)
+    # the ▁ character created bugs in the cpp tokenizer
+    s = s.replace('ENDCOM', '\n').replace('▁', ' SPACETOKEN ')
+    try:
+        tokens_and_types = get_cpp_tokens_and_types(s)
+    except:
+        return ''
+    new_tokens = []
+    i = 0
+    while i < len(tokens_and_types):
+        token, type = tokens_and_types[i]
+        if type in STRINGS_AND_COMMENTS_TOKEN_KINDS:
+            new_tokens.append(token.replace('STRNEWLINE', '\n').replace(
+                'TABSYMBOL', '\t').replace(' ', '').replace('SPACETOKEN', ' '))
+            if type == TokenKind.COMMENT:
+                new_tokens.append('NEW_LINE')
+        elif token == '}':
+            if i < len(tokens_and_types) - 1 and tokens_and_types[i + 1][0] == ';':
+                new_tokens += ['CB_COLON', 'NEW_LINE']
+                i += 2
+                continue
+            if i < len(tokens_and_types) - 1 and tokens_and_types[i + 1][0] == ',':
+                new_tokens += ['CB_COMA', 'NEW_LINE']
+                i += 2
+                continue
+            new_tokens += ['CB_', 'NEW_LINE']
+        elif token == '{':
+            new_tokens += ['OB_', 'NEW_LINE']
+        elif token == '*/':
+            new_tokens += ['*/', 'NEW_LINE']
+        elif token == ';':
+            new_tokens += [';', 'NEW_LINE']
+        else:
+            new_tokens.append(token)
+
+        if i < len(tokens_and_types) - 1 and tokens_and_types[i + 1][0] in TOK_NO_SPACE_BEFORE:
+            next_token = tokens_and_types[i + 1][0]
+            new_tokens[len(new_tokens) - 1] += next_token
+            if next_token == ';':
+                new_tokens.append('NEW_LINE')
+            i += 2
+            continue
+        i += 1
+
+    lines = re.split('NEW_LINE', ' '.join(new_tokens))
+
+    untok_s = indent_lines(lines)
+    untok_s = untok_s.replace('CB_COLON', '};').replace(
+        'CB_COMA', '},').replace('CB_', '}').replace('OB_', '{')
+    return untok_s
+
 def detokenize_cpp(s):
    assert isinstance(s, str) or isinstance(s, list)
    if isinstance(s, list):
@@ -555,6 +643,8 @@ def extract_functions_java(s):
                        except StopIteration:
                            break
                    if 'static' in function[0:function.index('{')]:
+                        print(function)
+                        exit()
                        functions_standalone.append(
                            remove_java_annotation(' '.join(function)))
                    else:
@@ -610,6 +700,93 @@ def clean_hashtags_functions_cpp(function):
    function = function.strip()
    return function

+def extract_functions_cuda(s):
+    try:
+        s = clean_hashtags_functions_cpp(s)
+        s = s.replace('ENDCOM', '\n').replace('▁', 'SPACETOKEN')
+        tokens = get_cpp_tokens_and_types(s)
+    except:
+        return [], []
+    i = ind_iter(len(tokens))
+    functions_standalone = []
+    functions_class = []
+    try:
+        token, token_type = tokens[i.i]
+    except:
+        return [], []
+    while True:
+        try:
+            # detect function
+            if token == ')' and ((tokens[i.i + 1][0] == '{' and tokens[i.i + 2][0] != '}') or (
+                    tokens[i.i + 1][0] == 'throw' and tokens[i.i + 4][0] == '{' and tokens[i.i + 5][0] == '}')):
+                # go previous until the start of function
+                while token not in {';', '}', '{'}:
+                    try:
+                        i.prev()
+                    except StopIteration:
+                        break
+                    token = tokens[i.i][0]
+
+                i.next()
+                token, token_type = tokens[i.i]
+                if token_type == TokenKind.COMMENT:
+                    token = token.strip()
+                    token += " ENDCOM"
+                function = [token]
+                token_types = [token_type]
+                while token != '{':
+                    i.next()
+                    token, token_type = tokens[i.i]
+                    if token_type == TokenKind.COMMENT:
+                        token = token.strip()
+                        token += " ENDCOM"
+                    function.append(token)
+                    token_types.append(token_type)
+
+                if token_types[function.index('(') - 1] != TokenKind.IDENTIFIER:
+                    continue
+                if token == '{':
+                    number_indent = 1
+                    while not (token == '}' and number_indent == 0):
+                        try:
+                            i.next()
+                            token, token_type = tokens[i.i]
+                            if token == '{':
+                                number_indent += 1
+                            elif token == '}':
+                                number_indent -= 1
+                            if token_type == TokenKind.COMMENT:
+                                token = token.strip()
+                                token += " ENDCOM"
+                            function.append(token)
+                        except StopIteration:
+                            break
+                    if 'static' in function[0:function.index('{')] or '::' not in function[0:function.index('(')]:
+                        function = ' '.join(function)
+                        function = re.sub(
+                            "[<][ ][D][O][C][U][M][E][N][T].*?[>] ", "", function)
+                        function = clean_hashtags_functions_cpp(function)
+                        function = function.strip()
+                        function = function.replace(
+                            '\n', 'ENDCOM').replace('SPACETOKEN', '▁')
+                        if not re.sub('[^ ]*[ ][(][ ]\w*([ ][,][ ]\w*)*[ ][)]', "", function[:function.index('{')]).strip().startswith('{') and not function.startswith('#'):
+                            functions_standalone.append(function)
+                    else:
+                        function = ' '.join(function)
+                        function = re.sub(
+                            "[<][ ][D][O][C][U][M][E][N][T].*?[>] ", "", function)
+                        function = clean_hashtags_functions_cpp(function)
+                        function = function.strip()
+                        function = function.replace(
+                            '\n', 'ENDCOM').replace('SPACETOKEN', '▁')
+                        if not re.sub('[^ ]*[ ][(][ ]\w*([ ][,][ ]\w*)*[ ][)]', "", function[:function.index('{')]).strip().startswith('{') and not function.startswith('#'):
+                            functions_class.append(function)
+            i.next()
+            token = tokens[i.i][0]
+        except:
+            break
+    return functions_standalone, functions_class
+

 def extract_functions_cpp(s):
    try:
@@ -718,6 +895,26 @@ def extract_functions_cpp_with_docstring(function):
    else:
        return '', ''

+def extract_functions_cuda_with_docstring(function):
+    function = re.sub("[<][ ][D][O][C][U][M][E][N][T].*?[>] ", "", function)
+    ds = re.findall('[/][*].*?[*][/][ ]', function, re.DOTALL)
+    if len(ds) > 0:
+        for d in ds:
+            function = function.replace(d, '')
+        coms = ' '.join([d[:-1] for d in ds])
+        inline_coms = re.findall('[/][/].*?[E][N][D][C][O][M]', function)
+        for inline_com in inline_coms:
+            function = function.replace(inline_com, '')
+            coms += ' <INLINE> '
+            coms += inline_com
+        if len(re.sub(r'\W', '', coms.replace('<INLINE>', '').replace('ENDCOM', ''))) < 5:
+            return '', ''
+        else:
+            return re.sub('\s+', ' ', function), coms
+    else:
+        return '', ''
+
+

 def remove_java_annotation(function):
    return re.sub('^(@ (Override|Deprecated|SuppressWarnings) (\( .* \) )?)*', '', function)
@@ -737,6 +934,9 @@ def get_function_name_java(s):
 def get_function_name_cpp(s):
    return get_first_token_before_first_parenthesis(s)

+def get_function_name_cuda(s):
+    return get_first_token_before_first_parenthesis(s)
+

 def extract_arguments_java(f):
    return extract_arguments_java_using_parentheses(f)
@@ -745,6 +945,9 @@ def extract_arguments_java(f):
 def extract_arguments_cpp(f):
    return extract_arguments_java_using_parentheses(f)

+def extract_arguments_cuda(f):
+    return extract_arguments_java_using_parentheses(f)
+

 def extract_arguments_java_using_parentheses(f):
    f = f.split(' ')

--- a/preprocessing/src/dataset.py
+++ b/preprocessing/src/dataset.py
@@ -52,7 +52,7 @@ class Language:
        n_lines = get_nlines(all_tok)

        # shuf
-        shuf_file(all_tok)
+        # shuf_file(all_tok)

        # select test/valid/train and split train in 8
        subprocess.run(f"cat {all_tok} | head -n {test_size} > {self.folder.joinpath(f'valid{suffix}.tok')}",
@@ -60,6 +60,12 @@ class Language:
        subprocess.run(f"cat {all_tok} | head -n {2 * test_size} | tail -n {test_size}  > {self.folder.joinpath(f'test{suffix}.tok')}", shell=True, stdout=subprocess.PIPE,
                       stderr=subprocess.PIPE)
        split_len = int((n_lines - 2 * test_size) / 8)
+
+
+        train_len = int((n_lines - 2 * test_size))
+        subprocess.run(f"cat {all_tok} | tail -n {train_len}  > {self.folder.joinpath(f'train{suffix}.tok')}", shell=True, stdout=subprocess.PIPE,
+                           stderr=subprocess.PIPE)
+
        for n, i in zip(range(8), range(2 * test_size, n_lines, split_len)):
            subprocess.run(f"cat {all_tok} | head -n {i + split_len} | tail -n {split_len}  > {self.folder.joinpath(f'train{suffix}.{n}.tok')}", shell=True, stdout=subprocess.PIPE,
                           stderr=subprocess.PIPE)
@@ -94,6 +100,7 @@ class Language:
            executor = LocalExecutor()
        suffix = '.with_comments' if keep_comments else ''
        files = list(self.folder.glob(f'train{suffix}.[01234567].tok'))
+        files.append(self.folder.joinpath(f'train{suffix}.tok'))
        files.append(self.folder.joinpath(f'test{suffix}.tok'))
        files.append(self.folder.joinpath(f'valid{suffix}.tok'))
        toks = [tok for tok in files if not (tok.with_suffix('.functions_standalone.tok').is_file(
@@ -112,6 +119,9 @@ class Language:
            f'train{suffix}.[01234567].functions_class.tok'))
        files += list(self.folder.glob(
            f'train{suffix}.[01234567].functions_standalone.tok'))
+        files.append(self.folder.joinpath(f'train{suffix}.functions_class.tok'))
+        files.append(self.folder.joinpath(
+            f'train{suffix}.functions_standalone.tok'))
        files.append(self.folder.joinpath(f'test{suffix}.functions_class.tok'))
        files.append(self.folder.joinpath(
            f'test{suffix}.functions_standalone.tok'))
@@ -178,6 +188,7 @@ class Dataset:
            size_gb_ = size_gb / len(self.langs)
            nlines = [int(self.sizes[l.l][0] * size_gb_ * 1024 **
                          3 / self.sizes[l.l][1]) for l in self.langs]
+            print(nlines)
            print(
                f"we need to regroup {nlines} lines for {self.langs[0].l} {self.langs[1].l} and {self.langs[2].l} to gather {size_gb} Go")
        # train bpe on only 50 GB (25 each lang) of the tokenized train set
@@ -259,15 +270,31 @@ class Dataset:
            job.result()

        for split in ['test', 'valid']:
-            for f_type in ['functions_standalone', 'functions_class']:
+            for f_type in ['functions_standalone']:
                truncate_files(l.folder.joinpath(
                    f'{split}{self.suffix}.{f_type}.tok') for l in self.langs)

        print("apply bpe on train ... ")
        self.apply_bpe(
-            f'train{self.suffix}.[01234567].functions_*.tok', use_vocab=False, executor=bpe_executor)
+            f'train{self.suffix}.[01234567].functions_standalone.tok', use_vocab=False, executor=bpe_executor)
+        self.apply_bpe(
+            f'train{self.suffix}.functions_standalone.tok', use_vocab=False, executor=bpe_executor)
        print("apply bpe on test and valid ...")
-        self.apply_bpe(f'test{self.suffix}.functions_*.tok',
+        self.apply_bpe(f'test{self.suffix}.functions_standalone.tok',
+                       use_vocab=False, executor=bpe_executor)
+        self.apply_bpe(f'valid{self.suffix}.functions_standalone.tok',
+                       use_vocab=False, executor=bpe_executor)
+
+        print("apply bpe on paralle data...")
+        self.apply_bpe(f'train.cpp_sa-cuda_sa.cpp_sa.tok',
+                       use_vocab=False, executor=bpe_executor)
+        self.apply_bpe(f'train.cpp_sa-cuda_sa.cuda_sa.tok',
+                       use_vocab=False, executor=bpe_executor)
+        self.apply_bpe(f'test.cpp_sa-cuda_sa.cpp_sa.tok',
+                       use_vocab=False, executor=bpe_executor)
+        self.apply_bpe(f'test.cpp_sa-cuda_sa.cuda_sa.tok',
+                       use_vocab=False, executor=bpe_executor)
+        self.apply_bpe(f'valid.cpp_sa-cuda_sa.cpp_sa.tok',
                       use_vocab=False, executor=bpe_executor)
-        self.apply_bpe(f'valid{self.suffix}.functions_*.tok',
+        self.apply_bpe(f'valid.cpp_sa-cuda_sa.cuda_sa.tok',
                       use_vocab=False, executor=bpe_executor)
--- a/preprocessing/src/utils.py
+++ b/preprocessing/src/utils.py
@@ -44,7 +44,7 @@ def tokenize_json_helper(inpt):

 @timeout(3600)
 def output_all_tokenized_results(docs, f_tok):
-    pool = Pool(cpu_count())
+    pool = Pool(1)
    result_content_tokenized = tqdm.tqdm(pool.imap_unordered(
        tokenize_json_helper, docs), total=len(docs))
    for content_tokenized, path in result_content_tokenized:
@@ -52,7 +52,7 @@ def output_all_tokenized_results(docs, f_tok):
            continue
        else:
            content_tokenized = ' '.join(content_tokenized)
-            s = f"<DOCUMENT_ID=\"{path}\"> {content_tokenized} </DOCUMENT>"
+            s = f"<DOCUMENT_ID=\"{path}\">  {content_tokenized} </DOCUMENT>"
            # for some reason sometimes, some caracters of s
            # cannot be encoded into utf-8 and it failed to print, so use try/catch
            try:
@@ -96,6 +96,22 @@ def extract_functions_file(input_path, language, test_size=None):
    extract_auto_code = getattr(
        code_tokenizer, f"extract_functions_{language}")

+
+    index  = str(input_path).rfind('/')
+    suffix = str(input_path)[index:]
+    if "train" in suffix:
+        name = "train"
+        paral_name = "train.cpp_sa-cuda_sa.{}_sa.tok".format(language)
+    if "test" in suffix:
+        name = "test"
+        paral_name = "test.cpp_sa-cuda_sa.{}_sa.tok".format(language)
+    if "valid" in suffix:
+        name = "valid"
+        paral_name = "valid.cpp_sa-cuda_sa.{}_sa.tok".format(language)
+    paral_name = str(input_path)[:index+1] + paral_name
+    print("fuck_paral_name = ", paral_name)
+
+    f_paral_sa = open(paral_name, 'w', encoding='utf-8')
    with output_path_sa.open('w', encoding='utf-8') as f_sa:
        with output_path_class.open('w', encoding='utf-8') as f_class:
            pool = Pool(cpu_count())
@@ -104,11 +120,15 @@ def extract_functions_file(input_path, language, test_size=None):
                extract_auto_code, lines), total=len(lines))
            for func_standalone, func_class in result_functions:
                for func in func_standalone:
+                    f_paral_sa.write(name + "@@@@ | ")
+                    f_paral_sa.write(func)
+                    f_paral_sa.write('\n')
                    f_sa.write(func)
                    f_sa.write('\n')
                for func in func_class:
                    f_class.write(func)
                    f_class.write('\n')
+    f_paral_sa.close()


 def get_nlines(file_path):
@@ -162,6 +182,7 @@ def shuf_file(file_path):
 def apply_bpe_file(file_path, output, codes, vocab=None):
    if vocab is None:
        vocab = ''
+    print("comand = " + f"{FAST} applybpe {output} {file_path} {codes} {vocab}")
    process = subprocess.run(f"{FAST} applybpe {output} {file_path} {codes} {vocab}",
                             shell=True,
                             stdout=subprocess.PIPE,
@@ -195,6 +216,7 @@ def get_vocab_file(file_path, vocab):


 def binarize_for_XLM_file(file_path, vocab):
+    print("binary_command = " + f"python {XLM_PP} {vocab} {file_path}")
    process = subprocess.run(f"python {XLM_PP} {vocab} {file_path}",
                             shell=True,
                             stdout=subprocess.PIPE,
@@ -234,7 +256,9 @@ def regroup_and_select_data(files, output, nlines=None):


 def create_symlink(file_path, symlink):
-    assert file_path.is_file()
+    # print(file_path)
+    assert file_path.is_file(), "{0}, error".format(file_path)
+    #print(symlink)
    assert not symlink.is_file()
    process = subprocess.run(f"ln -s {file_path} {symlink}",
                             shell=True,

--- a/scripts/beiwanglu
+++ b/scripts/beiwanglu
+binary_command = python /lustre/S/wenyuanbo/Workspace/github/TransCoder/XLM/preprocess.py /lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data_big_10000_test/cpp-cuda-.with_comments/vocab /lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data_big_10000_test/cpp-cuda-.with_comments/cpp.valid.with_comments.functions_standalone.bpe
+
+
+
+/lustre/S/wenyuanbo/Workspace/github/TransCoder/XLM/tools/fastBPE/fast applybpe /lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data_big_10000_test/cpp-cuda-.w
+ith_comments/cuda.train.with_comments.6.bpe /lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data_big_10000_test/cuda/train.with_comments.6.tok /lustre/S/wenyuanbo/Works
+pace/github/TransCoder/pldi/data_big_10000_test/cpp-cuda-.with_comments/codes
--- a/scripts/clean.sh
+++ b/scripts/clean.sh
+rm -rf cpp/all.with_comments.tok
+rm -rf cpp/test*
+rm -rf cpp/train*
+rm -rf cpp/valid*
+rm -rf cuda/all.with_comments.tok
+rm -rf cuda/test*
+rm -rf cuda/train*
+rm -rf cuda/valid*
+rm -rf cpp-cuda-.with_comments/
+rm -rf cpp/cpp.000.with_comments.tok
+rm -rf cuda/cuda.000.with_comments.tok
+rm -rf cpp-cuda-.with_comments.XLM-syml/
--- a/scripts/demo.sh
+++ b/scripts/demo.sh
+#!/bin/bash
+
+#- Job parameters
+
+# (TODO)
+# Please modify job name
+
+# SBATCH -J test              # The job name
+# SBATCH -o ret-%j.out        # Write the standard output to file named 'ret-<job_number>.out'
+# SBATCH -e ret-%j.err        # Write the standard error to file named 'ret-<job_number>.err'
+
+
+#- Needed resources
+
+# (TODO)
+# Please modify your requirements
+
+#SBATCH -p nv-gpu,nv-gpu-hw          # Submit to 'nv-gpu' and 'nv-gpu-hw' Partitiion
+#SBATCH -t 0-12:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    
+#SBATCH --ntasks=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --nprocs=8
+#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
+#SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+
+###
+### The system will alloc 8 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K        # Request K cores
+###
+
+# SBATCH --qos=gpu-short                 # Request QOS Type
+
+#- Operstions
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+
+#- Load environments
+source /tools/module_env.sh
+module list                       # list modules loaded by default
+which python3
+which python
+# ##- tools
+# module load cluster-tools/v1.0
+# module load cmake/3.15.7
+# module load git/2.17.1
+# module load vim/8.1.2424
+
+# ##- language
+# module load python3/3.6.8
+# module load llvm/9.0.1
+# module load gcc/9.3.0
+
+# ##- cuda
+# module load cuda-cudnn/11.0-8.0.4
+
+##- virtualenv
+# source xxxxx/activate
+# conda init
+# conda activate
+
+#- Log information
+
+echo $(module list)              # list modules loaded
+# echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+# echo $(conda list)
+# echo $(pip list)
+
+# cluster-quota                    # nas quota
+
+nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}$"                             # which gpus
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+
+#- Job step
+# python -m preprocessing.preprocess /lustre/S/xushangqing/TransCoder/data/train_dataset/ --lang1 cpp --lang2 cu --keep_comments False --bpe_train_size 0 --test_size 1000 --local True
+# export NGPU=1; python XLM/train.py \
+#  --n_heads 8 --bt_steps '' --max_vocab 10000 --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 \
+#  --word_blank '0.1' --n_layers 6 --save_periodic 1 \
+#  --dump_path '/lustre/S/xushangqing/TransCoder/trained/' \
+#  --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cu_sa' --fp16 true \
+#  --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps ''\
+#  --word_shuffle 3 --tokens_per_batch -1 --has_sentences_ids true --attention_dropout 0.1\
+#  --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1\
+#  --generate_hypothesis true --lambda_mt 1 --epoch_size 10000 \
+#  --data_path '/lustre/S/xushangqing/TransCoder/data/train_dataset/cpp-cu-.XLM-syml/'\
+#  --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01'\
+#  --eval_computation false --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps 'cpp_sa-cu_sa'\
+#  --reload_emb '' --batch_size 32 --context_size 0 --word_dropout '0.1'\
+#  --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0\
+#  --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false\
+#  --lgs 'cpp_sa-cu_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1\
+#  --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '0.1' --eval_only false
+sleep 8h
+# export NGPU=8; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps '' --max_vocab 10000 --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/xushangqing/re_TransCoder/trained/pretrain/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch -1 --has_sentences_ids true --attention_dropout 0.1 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 10000 --data_path '/lustre/S/xushangqing/re_TransCoder/data/train_dataset/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation false --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps 'cpp_sa-cuda_sa' --reload_emb '' --batch_size 32 --context_size 0 --word_dropout '0.1' --reload_model '/lustre/S/xushangqing/re_TransCoder/trained/pretrain/mlm_cpp_cuda/17158/best-valid_mlm_ppl.pth,/lustre/S/xushangqing/re_TransCoder/trained/pretrain/mlm_cpp_cuda/17158/best-valid_mlm_ppl.pth' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '0.1' --eval_only false
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/scripts/env.sh
+++ b/scripts/env.sh
+export WORLD_SIZE=8
--- a/scripts/run.sh
+++ b/scripts/run.sh
+# python translate.py --src_lang cpp --tgt_lang python --model_path model/model_1.pth < $1
+python translate.py --src_lang cpp --tgt_lang python --model_path pretrained/model_1.pth <  data/evaluation/geeks_for_geeks_successful_test_scripts/cpp/BINARY_SEARCH.cpp
+# python translate.py --src_lang cpp --tgt_lang python --model_path pldi/model/mlm_cpp_cuda/89qzqrcec3/checkpoint.pth <  data/evaluation/geeks_for_geeks_successful_test_scripts/cpp/BINARY_SEARCH.cpp
--- a/scripts/run_ae_bt.sh
+++ b/scripts/run_ae_bt.sh
+# export NGPU=8; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py  --n_heads 8 --bt_steps '' --max_vocab 64000 --word_mask_keep_rand '0.8,0.1,0.1' --word_blank 0 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data/cpp-cuda-.with_comments.XLM-syml/' --save_periodic 0 --bptt 512 --lambda_clm 1 --ae_steps '' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --word_shuffle 0 --mlm_steps 'cpp,cuda' --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 100000 --stopping_criterion '_valid_mlm_ppl,6' --lambda_bt 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model/' --lambda_mt 1 --epoch_size 100000 --early_stopping false --gelu_activation false --n_layers 6 --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0003,weight_decay=0.01' --validation_metrics _valid_mlm_ppl --eval_bleu false --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 32 --context_size 0 --word_dropout 0 --reload_model '' --min_count 0 --lgs 'cpp-cuda' --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 0 --clip_grad_norm 5 --emb_dim 1024 --encoder_only true --beam_size 1 --clm_steps '' --exp_name mlm_cpp_cuda --lambda_ae 1 --lg_sampling_factor '-1' --eval_only false
+
+# export NGPU=8; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps 'cpp_sa-cuda_sa-cpp_sa' --max_vocab '-1' --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch 6000 --has_sentences_ids false --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 30000 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation true --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 32 --context_size 0 --word_dropout '0.1' --reload_model '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model/mlm_cpp_cuda/89qzqrcec3/checkpoint.pth,/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model/mlm_cpp_cuda/89qzqrcec3/checkpoint.pth' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '-1' --eval_only false
+
+
+
+
+# jexport NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps 'cpp_sa-cuda_sa-cpp_sa' --max_vocab '-1' --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch 60 --has_sentences_ids false --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 30000 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data_big_10000/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation true --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 8 --context_size 0 --word_dropout '0.1' --reload_model '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model/mlm_cpp_cuda/89qzqrcec3/checkpoint.pth,/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model/mlm_cpp_cuda/89qzqrcec3/checkpoint.pth' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '-1' --eval_only true
+
+
+# reload false
+# export NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps 'cpp_sa-cuda_sa-cpp_sa' --max_vocab '-1' --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch 60 --has_sentences_ids false --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 30000 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data_big_10000/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation true --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 8 --context_size 0 --word_dropout '0.1' --reload_model '' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '-1' --eval_only true
+
+# reload false
+# export NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps 'cpp_sa-cuda_sa-cpp_sa' --max_vocab '-1' --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000_test/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch 60 --has_sentences_ids true --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 30 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/tmp/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation true --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 8 --context_size 0 --word_dropout '0.1' --reload_model '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000_test/mlm_cpp_cuda/mifas4kg4t/checkpoint.pth,/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000_test/mlm_cpp_cuda/mifas4kg4t/checkpoint.pth' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '-1' --eval_only true
+# reload true for pretrain
+# export NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps 'cpp_sa-cuda_sa-cpp_sa' --max_vocab 10000 --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/xushangqing/re_TransCoder/trained/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch 6000 --has_sentences_ids true --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 3000 --data_path '/lustre/S/xushangqing/re_TransCoder/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation true --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 32 --context_size 0 --word_dropout '0.1' --reload_model '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000_test/mlm_cpp_cuda/pgpbjiv45a/checkpoint.pth,/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000_test/mlm_cpp_cuda/pgpbjiv45a/checkpoint.pth' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '0.1' --eval_only true
+
+export NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps 'cpp_sa-cuda_sa-cpp_sa' --max_vocab 10000 --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/xushangqing/re_TransCoder/trained/' --max_len 500 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch 6000 --has_sentences_ids true --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 3000 --data_path '/lustre/S/xushangqing/re_TransCoder/data/train_dataset/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation true --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 32 --context_size 0 --word_dropout '0.1' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '0.1' --eval_only false
+
+# python XLM/train.py \
+# export NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py\
+#  --n_heads 8 --bt_steps '' --max_vocab 10000 --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 \
+#  --word_blank '0.1' --n_layers 4 --save_periodic 1 \
+#  --dump_path '/lustre/S/xushangqing/re_TransCoder/trained/' \
+#  --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true \
+#  --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps ''\
+#  --word_shuffle 3 --tokens_per_batch -1 --has_sentences_ids true --attention_dropout 0.1\
+#  --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1\
+#  --generate_hypothesis true --lambda_mt 1 --epoch_size 1000 \
+#  --data_path '/lustre/S/xushangqing/re_TransCoder/data/train_dataset/cpp-cuda-.with_comments.XLM-syml/'\
+#  --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01'\
+#  --eval_computation false --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps 'cpp_sa-cuda_sa'\
+#  --reload_emb '' --batch_size 32 --context_size 0 --word_dropout '0.1'\
+#  --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0\
+#  --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false\
+#  --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1\
+#  --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '0.1' --eval_only false
\ No newline at end of file
--- a/scripts/run_ae_mt.sh
+++ b/scripts/run_ae_mt.sh
+# run for train
+
+
+# no ae, only mt.
+cd ..
+# export NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps '' --max_vocab 10000 --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank 0 --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/20210526/model/' --max_len 5120 --bptt 256 --lambda_clm 1  --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 0 --tokens_per_batch 2000 --has_sentences_ids true --attention_dropout 0.1 --split_data false --length_penalty 1 --max_epoch 10000000  --epoch_size 10000 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/20210526/data/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation false --stopping_criterion 'valid_cpp_sa-cuda_sa_mt_bleu,10' --validation_metrics 'valid_cpp_sa-cuda_sa_mt_bleu' --eval_bleu true --dropout '0.1' --mt_steps 'cpp_sa-cuda_sa' --reload_emb '' --batch_size 8 --context_size 0 --word_dropout '0' --reload_model '/lustre/S/wenyuanbo/Workspace/github/TransCoder/20210526/model/mlm_cpp_cuda/17569/best-valid_mlm_ppl.pth,/lustre/S/wenyuanbo/Workspace/github/TransCoder/20210526/model/mlm_cpp_cuda/17569/best-valid_mlm_ppl.pth' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps ''  --beam_size 1  --lg_sampling_factor '0.1' --eval_only false --exp_name debug_xsq
+
+#export NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps '' --max_vocab 10000 --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank 0 --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/20210526/model/' --max_len 5120 --bptt 512 --lambda_clm 1  --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 0 --tokens_per_batch 2000 --has_sentences_ids true --attention_dropout 0.1 --split_data false --length_penalty 1 --max_epoch 10000000  --epoch_size 10000 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/20210526/data/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation false --stopping_criterion 'valid_cpp_sa-cuda_sa_mt_bleu,10' --validation_metrics 'valid_cpp_sa-cuda_sa_mt_bleu' --eval_bleu true --dropout '0.1' --mt_steps 'cpp_sa-cuda_sa' --reload_emb '' --batch_size 8 --context_size 0 --word_dropout '0' --reload_model '/lustre/S/wenyuanbo/Workspace/github/TransCoder/20210526/model/mlm_cpp_cuda/17569/best-valid_mlm_ppl.pth,/lustre/S/wenyuanbo/Workspace/github/TransCoder/20210526/model/mlm_cpp_cuda/17569/best-valid_mlm_ppl.pth' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps ''  --beam_size 1  --lg_sampling_factor '0.1' --eval_only false --exp_name debug_xsq
+
+export NGPU=1; export CUDA_VISIBLE_DEVICES=0; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps '' --max_vocab 10000 --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank 0 --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/xushangqing/re_TransCoder/trained/mt/' --max_len 1024 --bptt 512 --lambda_clm 1  --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 0 --tokens_per_batch -1 --has_sentences_ids true --attention_dropout 0.1 --split_data false --length_penalty 1 --max_epoch 1000  --epoch_size 1000 --data_path '/lustre/S/xushangqing/re_TransCoder/data/train_dataset/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation false --stopping_criterion 'valid_cpp_sa-cuda_sa_mt_bleu,10' --validation_metrics 'valid_cpp_sa-cuda_sa_mt_bleu' --eval_bleu true --dropout '0.1' --mt_steps 'cpp_sa-cuda_sa' --reload_emb '' --batch_size 8 --context_size 0 --word_dropout '0' --reload_model '' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps ''  --beam_size 1  --lg_sampling_factor '0.1' --eval_only false --exp_name debug_xsq
+
+
+# /lustre/S/xushangqing/re_TransCoder/trained/pretrain/mlm_cpp_cuda/17699/best-valid_mlm_ppl.pth,/lustre/S/xushangqing/re_TransCoder/trained/pretrain/mlm_cpp_cuda/17699/best-valid_mlm_ppl.pth 
+
+# export NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps '' --max_vocab 10000 --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_test_for_sep/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps ''
+# --word_shuffle 3 --tokens_per_batch 2000 --has_sentences_ids true --attention_dropout 0.1 --split_data false --length_penalty 1 --max_epoch 10000000 2000stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 10000 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/test_for_sep/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01'
+# --eval_computation false --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps 'cpp_sa-cuda_sa' --reload_emb '' --batch_size 8 --context_size 0 --word_dropout '0.1' --reload_model '/lustre/S/wenyuanbo/Workspace/github/TransCoder/20210526/model/mlm_cpp_cuda/17569/best-valid_mlm_ppl.pth,/lustre/S/wenyuanbo/Workspace/github/TransCoder/20210526/model/mlm_cpp_cuda/17569/best-valid_mlm_ppl.pth' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '0.1' --eval_only false
+#
+
+
+#
+
+
+
+
+
+# run for eval
+# export NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps '' --max_vocab 10000 --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_test_for_sep/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch -1 --has_sentences_ids true --attention_dropout 0.1 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 1000 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/test_for_sep/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation false --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps 'cpp_sa-cuda_sa' --reload_emb '' --batch_size 32 --context_size 0 --word_dropout '0.1' --reload_model '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_test_for_sep/bt_with_comments_sa_final_modif_test/qiff5v05p7/periodic-36.pth,/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_test_for_sep/bt_with_comments_sa_final_modif_test/qiff5v05p7/periodic-36.pth' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '0.1' --eval_only true
+
+
+
+cd -
--- a/scripts/run_para_pipe.sh
+++ b/scripts/run_para_pipe.sh
+pair=cpp-cu
+OUTPATH=/lustre/S/xushangqing/TransCoder/data/train_dataset
+
+for lg in $(echo $pair | sed -e 's/\-/ /g'); do
+  for split in train valid test; do
+    $FASTBPE applybpe $OUTPATH/$pair.$lg.$split data/wiki/para/$pair.$lg.$split $OUTPATH/codes
+    python preprocess.py $OUTPATH/vocab $OUTPATH/$pair.$lg.$split
+  done
+done
\ No newline at end of file
--- a/scripts/run_pipeline.sh
+++ b/scripts/run_pipeline.sh
+# python -m preprocessing.preprocess /lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data_ori/  --lang1 cpp --lang2 python  --keep_comments True --bpe_train_size 0 --test_size 10 --local True
+python -m preprocessing.preprocess /lustre/S/xushangqing/re_TransCoder/data/train_dataset/  --lang1 cpp --lang2 cuda  --keep_comments True --bpe_train_size 0 --test_size 10 --local True
--- a/scripts/run_pretrain.sh
+++ b/scripts/run_pretrain.sh
+# python XLM/train.py --n_heads 8 --bt_steps '' --max_vocab 64000 --word_mask_keep_rand '0.8,0.1,0.1' --word_blank 0 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/data/test_dataset/cpp-python-.with_comments.XLM-syml/' --save_periodic 0 --bptt 512 --lambda_clm 1 --ae_steps '' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --word_shuffle 0 --mlm_steps 'cpp,python' --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 100000 --stopping_criterion '_valid_mlm_ppl,10' --lambda_bt 1 --dump_path './temp' --lambda_mt 1 --epoch_size 100000 --early_stopping false --gelu_activation false --n_layers 6 --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0003,weight_decay=0.01' --validation_metrics _valid_mlm_ppl --eval_bleu false --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 32 --context_size 0 --word_dropout 0 --reload_model '' --min_count 0 --lgs 'cpp-python' --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 0 --clip_grad_norm 5 --emb_dim 1024 --encoder_only true --beam_size 1 --clm_steps '' --exp_name mlm_cpp_python --lambda_ae 1 --lg_sampling_factor '-1' --eval_only false
+
+# export NGPU=8; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py  --n_heads 8 --bt_steps '' --max_vocab 10000 --word_mask_keep_rand '0.8,0.1,0.1' --word_blank 0 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data_big_10000/cpp-cuda-.with_comments.XLM-syml/' --save_periodic 0 --bptt 512 --lambda_clm 1 --ae_steps '' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --word_shuffle 0 --mlm_steps 'cpp,cuda' --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 100000 --stopping_criterion '_valid_mlm_ppl,6' --lambda_bt 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000/' --lambda_mt 1 --epoch_size 100000 --early_stopping false --gelu_activation false --n_layers 6 --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0003,weight_decay=0.01' --validation_metrics _valid_mlm_ppl --eval_bleu false --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 32 --context_size 0 --word_dropout 0 --reload_model '' --min_count 0 --lgs 'cpp-cuda' --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 0 --clip_grad_norm 5 --emb_dim 1024 --encoder_only true --beam_size 1 --clm_steps '' --exp_name mlm_cpp_cuda --lambda_ae 1 --lg_sampling_factor '-1' --eval_only false
+
+cd ..
+export NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py  --n_heads 8 --bt_steps '' --max_vocab 10000 --word_mask_keep_rand '0.8,0.1,0.1' --word_blank 0 --data_path '/lustre/S/xushangqing/re_TransCoder/data/train_dataset/cpp-cuda-.with_comments.XLM-syml/' --save_periodic 0 --bptt 512 --lambda_clm 1 --ae_steps '' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --word_shuffle 0 --mlm_steps 'cpp,cuda' --attention_dropout 0.1 --split_data false --length_penalty 1 --max_epoch 100000 --stopping_criterion '_valid_mlm_ppl,10' --lambda_bt 1 --dump_path '/lustre/S/xushangqing/re_TransCoder/trained/pretrain/' --lambda_mt 1 --epoch_size 500 --early_stopping false --gelu_activation false --n_layers 6 --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0003,weight_decay=0.01' --validation_metrics _valid_mlm_ppl --eval_bleu false --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 32 --context_size 0 --word_dropout 0 --reload_model '' --min_count 0 --lgs 'cpp-cuda' --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 0 --clip_grad_norm 5 --emb_dim 1024 --encoder_only true --beam_size 1 --clm_steps '' --exp_name mlm_cpp_cuda --lambda_ae 1 --lg_sampling_factor '-1' --eval_only false --max_len 1024
+cd -
--- a/scripts/scripts/beiwanglu
+++ b/scripts/scripts/beiwanglu
+binary_command = python /lustre/S/wenyuanbo/Workspace/github/TransCoder/XLM/preprocess.py /lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data_big_10000_test/cpp-cuda-.with_comments/vocab /lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data_big_10000_test/cpp-cuda-.with_comments/cpp.valid.with_comments.functions_standalone.bpe
+
+
+
+/lustre/S/wenyuanbo/Workspace/github/TransCoder/XLM/tools/fastBPE/fast applybpe /lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data_big_10000_test/cpp-cuda-.w
+ith_comments/cuda.train.with_comments.6.bpe /lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data_big_10000_test/cuda/train.with_comments.6.tok /lustre/S/wenyuanbo/Works
+pace/github/TransCoder/pldi/data_big_10000_test/cpp-cuda-.with_comments/codes
--- a/scripts/scripts/demo.sh
+++ b/scripts/scripts/demo.sh
+#!/bin/bash
+
+#- Job parameters
+
+# (TODO)
+# Please modify job name
+
+#SBATCH -J test              # The job name
+#SBATCH -o ret-%j.out        # Write the standard output to file named 'ret-<job_number>.out'
+#SBATCH -e ret-%j.err        # Write the standard error to file named 'ret-<job_number>.err'
+
+
+#- Needed resources
+
+# (TODO)
+# Please modify your requirements
+
+#SBATCH -p nv-gpu,nv-gpu-hw          # Submit to 'nv-gpu' and 'nv-gpu-hw' Partitiion
+#SBATCH -t 0-12:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:4                 # Request M GPU per node
+#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
+#SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+
+###
+### The system will alloc 8 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K        # Request K cores
+###
+
+#SBATCH --qos=gpu-normal                 # Request QOS Type
+
+#- Operstions
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+
+#- Load environments
+source /tools/module_env.sh
+module list                       # list modules loaded by default
+which python3
+which python
+# ##- tools
+# module load cluster-tools/v1.0
+# module load cmake/3.15.7
+# module load git/2.17.1
+# module load vim/8.1.2424
+
+# ##- language
+# module load python3/3.6.8
+# module load llvm/9.0.1
+# module load gcc/9.3.0
+
+# ##- cuda
+# module load cuda-cudnn/11.0-8.0.4
+
+##- virtualenv
+# source xxxxx/activate
+# conda init
+# conda activate
+
+#- Log information
+
+echo $(module list)              # list modules loaded
+# echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+# echo $(conda list)
+# echo $(pip list)
+
+# cluster-quota                    # nas quota
+
+nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}$"                             # which gpus
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+
+#- Job step
+# python -m preprocessing.preprocess /lustre/S/xushangqing/TransCoder/data/train_dataset/ --lang1 cpp --lang2 cu --keep_comments False --bpe_train_size 0 --test_size 1000 --local True
+# export NGPU=1; python XLM/train.py \
+#  --n_heads 8 --bt_steps '' --max_vocab 10000 --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 \
+#  --word_blank '0.1' --n_layers 6 --save_periodic 1 \
+#  --dump_path '/lustre/S/xushangqing/TransCoder/trained/' \
+#  --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cu_sa' --fp16 true \
+#  --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps ''\
+#  --word_shuffle 3 --tokens_per_batch -1 --has_sentences_ids true --attention_dropout 0.1\
+#  --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1\
+#  --generate_hypothesis true --lambda_mt 1 --epoch_size 10000 \
+#  --data_path '/lustre/S/xushangqing/TransCoder/data/train_dataset/cpp-cu-.XLM-syml/'\
+#  --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01'\
+#  --eval_computation false --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps 'cpp_sa-cu_sa'\
+#  --reload_emb '' --batch_size 32 --context_size 0 --word_dropout '0.1'\
+#  --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0\
+#  --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false\
+#  --lgs 'cpp_sa-cu_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1\
+#  --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '0.1' --eval_only false
+sleep 8h
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/scripts/scripts/env.sh
+++ b/scripts/scripts/env.sh
+export WORLD_SIZE=8
--- a/scripts/scripts/run.sh
+++ b/scripts/scripts/run.sh
+# python translate.py --src_lang cpp --tgt_lang python --model_path model/model_1.pth < $1
+python translate.py --src_lang cpp --tgt_lang python --model_path pretrained/model_1.pth <  data/evaluation/geeks_for_geeks_successful_test_scripts/cpp/BINARY_SEARCH.cpp
+# python translate.py --src_lang cpp --tgt_lang python --model_path pldi/model/mlm_cpp_cuda/89qzqrcec3/checkpoint.pth <  data/evaluation/geeks_for_geeks_successful_test_scripts/cpp/BINARY_SEARCH.cpp
--- a/scripts/scripts/run_ae_bt.sh
+++ b/scripts/scripts/run_ae_bt.sh
+# export NGPU=8; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py  --n_heads 8 --bt_steps '' --max_vocab 64000 --word_mask_keep_rand '0.8,0.1,0.1' --word_blank 0 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data/cpp-cuda-.with_comments.XLM-syml/' --save_periodic 0 --bptt 512 --lambda_clm 1 --ae_steps '' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --word_shuffle 0 --mlm_steps 'cpp,cuda' --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 100000 --stopping_criterion '_valid_mlm_ppl,6' --lambda_bt 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model/' --lambda_mt 1 --epoch_size 100000 --early_stopping false --gelu_activation false --n_layers 6 --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0003,weight_decay=0.01' --validation_metrics _valid_mlm_ppl --eval_bleu false --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 32 --context_size 0 --word_dropout 0 --reload_model '' --min_count 0 --lgs 'cpp-cuda' --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 0 --clip_grad_norm 5 --emb_dim 1024 --encoder_only true --beam_size 1 --clm_steps '' --exp_name mlm_cpp_cuda --lambda_ae 1 --lg_sampling_factor '-1' --eval_only false
+
+# export NGPU=8; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps 'cpp_sa-cuda_sa-cpp_sa' --max_vocab '-1' --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch 6000 --has_sentences_ids false --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 30000 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation true --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 32 --context_size 0 --word_dropout '0.1' --reload_model '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model/mlm_cpp_cuda/89qzqrcec3/checkpoint.pth,/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model/mlm_cpp_cuda/89qzqrcec3/checkpoint.pth' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '-1' --eval_only false
+
+
+
+
+# jexport NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps 'cpp_sa-cuda_sa-cpp_sa' --max_vocab '-1' --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch 60 --has_sentences_ids false --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 30000 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data_big_10000/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation true --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 8 --context_size 0 --word_dropout '0.1' --reload_model '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model/mlm_cpp_cuda/89qzqrcec3/checkpoint.pth,/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model/mlm_cpp_cuda/89qzqrcec3/checkpoint.pth' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '-1' --eval_only true
+
+
+# reload false
+# export NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps 'cpp_sa-cuda_sa-cpp_sa' --max_vocab '-1' --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch 60 --has_sentences_ids false --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 30000 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data_big_10000/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation true --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 8 --context_size 0 --word_dropout '0.1' --reload_model '' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '-1' --eval_only true
+
+# reload false
+# export NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps 'cpp_sa-cuda_sa-cpp_sa' --max_vocab '-1' --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000_test/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch 60 --has_sentences_ids true --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 30 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/tmp/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation true --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 8 --context_size 0 --word_dropout '0.1' --reload_model '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000_test/mlm_cpp_cuda/mifas4kg4t/checkpoint.pth,/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000_test/mlm_cpp_cuda/mifas4kg4t/checkpoint.pth' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '-1' --eval_only true
+# reload true for pretrain
+export NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps 'cpp_sa-cuda_sa-cpp_sa' --max_vocab 10000 --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000_test/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch 6000 --has_sentences_ids true --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 3000 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/tmp/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation true --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 32 --context_size 0 --word_dropout '0.1' --reload_model '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000_test/mlm_cpp_cuda/pgpbjiv45a/checkpoint.pth,/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000_test/mlm_cpp_cuda/pgpbjiv45a/checkpoint.pth' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '0.1' --eval_only true
--- a/scripts/scripts/run_ae_mt.sh
+++ b/scripts/scripts/run_ae_mt.sh
+# run for train
+# export NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps '' --max_vocab 10000 --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_test_for_sep/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch -1 --has_sentences_ids true --attention_dropout 0.1 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 10000 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/test_for_sep/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation false --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps 'cpp_sa-cuda_sa' --reload_emb '' --batch_size 32 --context_size 0 --word_dropout '0.1' --reload_model '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_test_for_sep/checkpoint/best-valid_mlm_ppl.pth,/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_test_for_sep/checkpoint/best-valid_mlm_ppl.pth' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '0.1' --eval_only false
+
+
+
+
+
+
+
+
+
+# run for eval
+export NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps '' --max_vocab 10000 --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_test_for_sep/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch -1 --has_sentences_ids true --attention_dropout 0.1 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 1000 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/test_for_sep/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation false --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps 'cpp_sa-cuda_sa' --reload_emb '' --batch_size 32 --context_size 0 --word_dropout '0.1' --reload_model '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_test_for_sep/bt_with_comments_sa_final_modif_test/qiff5v05p7/periodic-36.pth,/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_test_for_sep/bt_with_comments_sa_final_modif_test/qiff5v05p7/periodic-36.pth' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '0.1' --eval_only true
--- a/scripts/scripts/run_para_pipe.sh
+++ b/scripts/scripts/run_para_pipe.sh
+pair=cpp-cu
+OUTPATH=/lustre/S/xushangqing/TransCoder/data/train_dataset
+
+for lg in $(echo $pair | sed -e 's/\-/ /g'); do
+  for split in train valid test; do
+    $FASTBPE applybpe $OUTPATH/$pair.$lg.$split data/wiki/para/$pair.$lg.$split $OUTPATH/codes
+    python preprocess.py $OUTPATH/vocab $OUTPATH/$pair.$lg.$split
+  done
+done
\ No newline at end of file
--- a/scripts/scripts/run_pipeline.sh
+++ b/scripts/scripts/run_pipeline.sh
+# python -m preprocessing.preprocess /lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data_ori/  --lang1 cpp --lang2 python  --keep_comments True --bpe_train_size 0 --test_size 10 --local True
+python -m preprocessing.preprocess /lustre/S/xushangqing/TransCoder/data/train_dataset/  --lang1 cpp --lang2 cuda  --keep_comments True --bpe_train_size 0 --test_size 1000 --local True
--- a/scripts/scripts/run_pretrain.sh
+++ b/scripts/scripts/run_pretrain.sh
+# python XLM/train.py --n_heads 8 --bt_steps '' --max_vocab 64000 --word_mask_keep_rand '0.8,0.1,0.1' --word_blank 0 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/data/test_dataset/cpp-python-.with_comments.XLM-syml/' --save_periodic 0 --bptt 512 --lambda_clm 1 --ae_steps '' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --word_shuffle 0 --mlm_steps 'cpp,python' --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 100000 --stopping_criterion '_valid_mlm_ppl,10' --lambda_bt 1 --dump_path './temp' --lambda_mt 1 --epoch_size 100000 --early_stopping false --gelu_activation false --n_layers 6 --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0003,weight_decay=0.01' --validation_metrics _valid_mlm_ppl --eval_bleu false --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 32 --context_size 0 --word_dropout 0 --reload_model '' --min_count 0 --lgs 'cpp-python' --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 0 --clip_grad_norm 5 --emb_dim 1024 --encoder_only true --beam_size 1 --clm_steps '' --exp_name mlm_cpp_python --lambda_ae 1 --lg_sampling_factor '-1' --eval_only false
+
+# export NGPU=8; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py  --n_heads 8 --bt_steps '' --max_vocab 10000 --word_mask_keep_rand '0.8,0.1,0.1' --word_blank 0 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data_big_10000/cpp-cuda-.with_comments.XLM-syml/' --save_periodic 0 --bptt 512 --lambda_clm 1 --ae_steps '' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --word_shuffle 0 --mlm_steps 'cpp,cuda' --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 100000 --stopping_criterion '_valid_mlm_ppl,6' --lambda_bt 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000/' --lambda_mt 1 --epoch_size 100000 --early_stopping false --gelu_activation false --n_layers 6 --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0003,weight_decay=0.01' --validation_metrics _valid_mlm_ppl --eval_bleu false --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 32 --context_size 0 --word_dropout 0 --reload_model '' --min_count 0 --lgs 'cpp-cuda' --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 0 --clip_grad_norm 5 --emb_dim 1024 --encoder_only true --beam_size 1 --clm_steps '' --exp_name mlm_cpp_cuda --lambda_ae 1 --lg_sampling_factor '-1' --eval_only false
+
+export NGPU=8; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py  --n_heads 8 --bt_steps '' --max_vocab 10000 --word_mask_keep_rand '0.8,0.1,0.1' --word_blank 0 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/test_for_sep/cpp-cuda-.with_comments.XLM-syml/' --save_periodic 0 --bptt 512 --lambda_clm 1 --ae_steps '' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --word_shuffle 0 --mlm_steps 'cpp,cuda' --attention_dropout 0.1 --split_data false --length_penalty 1 --max_epoch 100000 --stopping_criterion '_valid_mlm_ppl,10' --lambda_bt 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_test_for_sep/' --lambda_mt 1 --epoch_size 10000 --early_stopping false --gelu_activation false --n_layers 6 --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0003,weight_decay=0.01' --validation_metrics _valid_mlm_ppl --eval_bleu false --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 32 --context_size 0 --word_dropout 0 --reload_model '' --min_count 0 --lgs 'cpp-cuda' --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 0 --clip_grad_norm 5 --emb_dim 1024 --encoder_only true --beam_size 1 --clm_steps '' --exp_name mlm_cpp_cuda --lambda_ae 1 --lg_sampling_factor '-1' --eval_only false
--- a/scripts/scripts/selfmade_bt.sh
+++ b/scripts/scripts/selfmade_bt.sh
+export NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py \
+ --n_heads 8 --bt_steps '' --max_vocab 10000 --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 \
+ --word_blank '0.1' --n_layers 6 --save_periodic 1 \
+ --dump_path '/lustre/S/xushangqing/TransCoder/trained/' \
+ --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cu_sa' --fp16 true \
+ --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps ''\
+ --word_shuffle 3 --tokens_per_batch -1 --has_sentences_ids true --attention_dropout 0.1\
+ --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1\
+ --generate_hypothesis true --lambda_mt 1 --epoch_size 10000 \
+ --data_path '/lustre/S/xushangqing/TransCoder/data/train_dataset/cpp-cu-.XLM-syml/'\
+ --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01'\
+ --eval_computation false --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps 'cpp_sa-cu_sa'\
+ --reload_emb '' --batch_size 32 --context_size 0 --word_dropout '0.1'\
+ --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0\
+ --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false\
+ --lgs 'cpp_sa-cu_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1\
+ --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '0.1' --eval_only false
\ No newline at end of file
--- a/scripts/scripts/temp.sh
+++ b/scripts/scripts/temp.sh
+# export NGPU=8; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py  --n_heads 8 --bt_steps '' --max_vocab 64000 --word_mask_keep_rand '0.8,0.1,0.1' --word_blank 0 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data/cpp-cuda-.with_comments.XLM-syml/' --save_periodic 0 --bptt 512 --lambda_clm 1 --ae_steps '' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --word_shuffle 0 --mlm_steps 'cpp,cuda' --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 100000 --stopping_criterion '_valid_mlm_ppl,6' --lambda_bt 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model/' --lambda_mt 1 --epoch_size 100000 --early_stopping false --gelu_activation false --n_layers 6 --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0003,weight_decay=0.01' --validation_metrics _valid_mlm_ppl --eval_bleu false --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 32 --context_size 0 --word_dropout 0 --reload_model '' --min_count 0 --lgs 'cpp-cuda' --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 0 --clip_grad_norm 5 --emb_dim 1024 --encoder_only true --beam_size 1 --clm_steps '' --exp_name mlm_cpp_cuda --lambda_ae 1 --lg_sampling_factor '-1' --eval_only false
+
+# export NGPU=8; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps 'cpp_sa-cuda_sa-cpp_sa' --max_vocab '-1' --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch 6000 --has_sentences_ids false --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 30000 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation true --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 32 --context_size 0 --word_dropout '0.1' --reload_model '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model/mlm_cpp_cuda/89qzqrcec3/checkpoint.pth,/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model/mlm_cpp_cuda/89qzqrcec3/checkpoint.pth' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '-1' --eval_only false
+
+
+
+
+# jexport NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps 'cpp_sa-cuda_sa-cpp_sa' --max_vocab '-1' --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch 60 --has_sentences_ids false --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 30000 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data_big_10000/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation true --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 8 --context_size 0 --word_dropout '0.1' --reload_model '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model/mlm_cpp_cuda/89qzqrcec3/checkpoint.pth,/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model/mlm_cpp_cuda/89qzqrcec3/checkpoint.pth' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '-1' --eval_only true
+
+
+# reload false
+# export NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps 'cpp_sa-cuda_sa-cpp_sa' --max_vocab '-1' --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch 60 --has_sentences_ids false --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 30000 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data_big_10000/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation true --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 8 --context_size 0 --word_dropout '0.1' --reload_model '' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '-1' --eval_only true
+
+# reload false
+# export NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps 'cpp_sa-cuda_sa-cpp_sa' --max_vocab '-1' --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000_test/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch 60 --has_sentences_ids true --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 30 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/tmp/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation true --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 8 --context_size 0 --word_dropout '0.1' --reload_model '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000_test/mlm_cpp_cuda/mifas4kg4t/checkpoint.pth,/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000_test/mlm_cpp_cuda/mifas4kg4t/checkpoint.pth' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '-1' --eval_only true
+# reload true for pretrain
+export NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps 'cpp_sa-cuda_sa-cpp_sa' --max_vocab 10000 --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/test_for_sep/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch 6000 --has_sentences_ids true --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 3000 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/test_for_sep/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation true --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 32 --context_size 0 --word_dropout '0.1' --reload_model '' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '0.1' --eval_only true
--- a/scripts/selfmade_bt.sh
+++ b/scripts/selfmade_bt.sh
+export NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py \
+ --n_heads 8 --bt_steps '' --max_vocab 10000 --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 \
+ --word_blank '0.1' --n_layers 6 --save_periodic 1 \
+ --dump_path '/lustre/S/xushangqing/TransCoder/trained/' \
+ --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cu_sa' --fp16 true \
+ --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps ''\
+ --word_shuffle 3 --tokens_per_batch -1 --has_sentences_ids true --attention_dropout 0.1\
+ --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1\
+ --generate_hypothesis true --lambda_mt 1 --epoch_size 10000 \
+ --data_path '/lustre/S/xushangqing/TransCoder/data/train_dataset/cpp-cu-.XLM-syml/'\
+ --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01'\
+ --eval_computation false --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps 'cpp_sa-cu_sa'\
+ --reload_emb '' --batch_size 32 --context_size 0 --word_dropout '0.1'\
+ --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0\
+ --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false\
+ --lgs 'cpp_sa-cu_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1\
+ --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '0.1' --eval_only false
\ No newline at end of file
--- a/scripts/sleep.slurm
+++ b/scripts/sleep.slurm
+#!/bin/bash
+
+#- Job parameters
+
+# (TODO)
+# Please modify job name
+
+#SBATCH -J test              # The job name
+#SBATCH -o ret-%j.out        # Write the standard output to file named 'ret-<job_number>.out'
+#SBATCH -e ret-%j.err        # Write the standard error to file named 'ret-<job_number>.err'
+
+
+#- Needed resources
+
+# (TODO)
+# Please modify your requirements
+
+#SBATCH -p nv-gpu,nv-gpu-hw          # Submit to 'nv-gpu' and 'nv-gpu-hw' Partitiion
+#SBATCH -t 0-12:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:4                 # Request M GPU per node
+#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
+#SBATCH --constraint="V100|V100S|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+#SBATCH --ntasks=1
+
+###
+### The system will alloc 8 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K        # Request K cores
+###
+
+#SBATCH --qos=gpu-short                 # Request QOS Type
+
+#- Operstions
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+
+#- Load environments
+source /tools/module_env.sh
+module list                       # list modules loaded by default
+
+##- tools
+# module load cluster-tools/v1.0
+# module load cmake/3.15.7
+# module load git/2.17.1
+# module load vim/8.1.2424
+
+# ##- language
+# module load python3/3.6.8
+
+# ##- cuda
+# module load cuda-cudnn/11.0-8.0.4
+
+##- virtualenv
+# source xxxxx/activate
+
+#- Log information
+
+echo $(module list)              # list modules loaded
+echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+
+cluster-quota                    # nas quota
+
+nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}$"                             # which gpus
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+
+echo $SLURM_JOB_NAME
+echo $SLURM_NNODES
+echo $SLURM_JOBID
+echo $SLURM_NTASKS
+echo $SLURM_TASKS_PER_NODE
+echo $SLURM_JOB_ID
+echo $SLURM_SUBMIT_DIR
+echo $SLURM_NPROCS
+echo $SLURM_CPUS_ON_NODE
+echo $SLURM_JOB_NODELIST
+echo $SLURM_JOB_CPUS_PER_NODE
+echo $SLURM_SUBMIT_HOST
+echo $SLURM_JOB_NUM_NODES
+
+env
+#- Job step
+# export NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps '' --max_vocab 10000 --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/xushangqing/re_TransCoder/trained/pretrain/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch -1 --has_sentences_ids true --attention_dropout 0.1 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 10000 --data_path '/lustre/S/xushangqing/re_TransCoder/data/train_dataset/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation false --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps 'cpp_sa-cuda_sa' --reload_emb '' --batch_size 32 --context_size 0 --word_dropout '0.1' --reload_model '/lustre/S/xushangqing/re_TransCoder/trained/pretrain/mlm_cpp_cuda/17158/best-valid_mlm_ppl.pth,/lustre/S/xushangqing/re_TransCoder/trained/pretrain/mlm_cpp_cuda/17158/best-valid_mlm_ppl.pth' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '0.1' --eval_only false
+
+#pretrain
+# export NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py  --n_heads 8 --bt_steps '' --max_vocab 10000 --word_mask_keep_rand '0.8,0.1,0.1' --word_blank 0 --data_path '/lustre/S/xushangqing/re_TransCoder/data/train_dataset/cpp-cuda-.with_comments.XLM-syml/' --save_periodic 0 --bptt 512 --lambda_clm 1 --ae_steps '' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --word_shuffle 0 --mlm_steps 'cpp,cuda' --attention_dropout 0.1 --split_data false --length_penalty 1 --max_epoch 100000 --stopping_criterion '_valid_mlm_ppl,10' --lambda_bt 1 --dump_path '/lustre/S/xushangqing/re_TransCoder/trained/pretrain/' --lambda_mt 1 --epoch_size 10000 --early_stopping false --gelu_activation false --n_layers 6 --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0003,weight_decay=0.01' --validation_metrics _valid_mlm_ppl --eval_bleu false --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 32 --context_size 0 --word_dropout 0 --reload_model '' --min_count 0 --lgs 'cpp-cuda' --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 0 --clip_grad_norm 5 --emb_dim 1024 --encoder_only true --beam_size 1 --clm_steps '' --exp_name mlm_cpp_cuda --lambda_ae 1 --lg_sampling_factor '-1' --eval_only false
+
+sleep 8h
+
+
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/scripts/temp.sh
+++ b/scripts/temp.sh
+# export NGPU=8; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py  --n_heads 8 --bt_steps '' --max_vocab 64000 --word_mask_keep_rand '0.8,0.1,0.1' --word_blank 0 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data/cpp-cuda-.with_comments.XLM-syml/' --save_periodic 0 --bptt 512 --lambda_clm 1 --ae_steps '' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --word_shuffle 0 --mlm_steps 'cpp,cuda' --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 100000 --stopping_criterion '_valid_mlm_ppl,6' --lambda_bt 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model/' --lambda_mt 1 --epoch_size 100000 --early_stopping false --gelu_activation false --n_layers 6 --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0003,weight_decay=0.01' --validation_metrics _valid_mlm_ppl --eval_bleu false --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 32 --context_size 0 --word_dropout 0 --reload_model '' --min_count 0 --lgs 'cpp-cuda' --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 0 --clip_grad_norm 5 --emb_dim 1024 --encoder_only true --beam_size 1 --clm_steps '' --exp_name mlm_cpp_cuda --lambda_ae 1 --lg_sampling_factor '-1' --eval_only false
+
+# export NGPU=8; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps 'cpp_sa-cuda_sa-cpp_sa' --max_vocab '-1' --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch 6000 --has_sentences_ids false --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 30000 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation true --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 32 --context_size 0 --word_dropout '0.1' --reload_model '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model/mlm_cpp_cuda/89qzqrcec3/checkpoint.pth,/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model/mlm_cpp_cuda/89qzqrcec3/checkpoint.pth' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '-1' --eval_only false
+
+
+
+
+# jexport NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps 'cpp_sa-cuda_sa-cpp_sa' --max_vocab '-1' --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch 60 --has_sentences_ids false --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 30000 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data_big_10000/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation true --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 8 --context_size 0 --word_dropout '0.1' --reload_model '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model/mlm_cpp_cuda/89qzqrcec3/checkpoint.pth,/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model/mlm_cpp_cuda/89qzqrcec3/checkpoint.pth' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '-1' --eval_only true
+
+
+# reload false
+# export NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps 'cpp_sa-cuda_sa-cpp_sa' --max_vocab '-1' --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch 60 --has_sentences_ids false --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 30000 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/data_big_10000/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation true --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 8 --context_size 0 --word_dropout '0.1' --reload_model '' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '-1' --eval_only true
+
+# reload false
+# export NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps 'cpp_sa-cuda_sa-cpp_sa' --max_vocab '-1' --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000_test/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch 60 --has_sentences_ids true --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 30 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/tmp/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation true --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 8 --context_size 0 --word_dropout '0.1' --reload_model '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000_test/mlm_cpp_cuda/mifas4kg4t/checkpoint.pth,/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/model_big_10000_test/mlm_cpp_cuda/mifas4kg4t/checkpoint.pth' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '-1' --eval_only true
+# reload true for pretrain
+export NGPU=1; python -m torch.distributed.launch --nproc_per_node=$NGPU XLM/train.py --n_heads 8 --bt_steps 'cpp_sa-cuda_sa-cpp_sa' --max_vocab 10000 --word_mask_keep_rand '0.8,0.1,0.1' --gen_tpb_multiplier 1 --word_blank '0.1' --n_layers 6 --save_periodic 1 --dump_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/test_for_sep/' --max_len 5120 --bptt 256 --lambda_clm 1 --ae_steps 'cpp_sa,cuda_sa' --fp16 true --share_inout_emb true --lambda_mlm 1 --sinusoidal_embeddings false --mlm_steps '' --word_shuffle 3 --tokens_per_batch 6000 --has_sentences_ids true --attention_dropout 0 --split_data false --length_penalty 1 --max_epoch 10000000 --stopping_criterion '' --lambda_bt 1 --generate_hypothesis true --lambda_mt 1 --epoch_size 3000 --data_path '/lustre/S/wenyuanbo/Workspace/github/TransCoder/pldi/test_for_sep/cpp-cuda-.with_comments.XLM-syml/' --gelu_activation false --split_data_accross_gpu global --optimizer 'adam_inverse_sqrt,warmup_updates=10000,lr=0.0001,weight_decay=0.01' --eval_computation true --validation_metrics '' --eval_bleu true --dropout '0.1' --mt_steps '' --reload_emb '' --batch_size 32 --context_size 0 --word_dropout '0.1' --reload_model '' --min_count 0 --eval_bleu_test_only false --group_by_size true --early_stopping false --sample_alpha 0 --word_pred '0.15' --amp 2 --max_batch_size 128 --clip_grad_norm 5 --emb_dim 1024 --encoder_only false --lgs 'cpp_sa-cuda_sa' --clm_steps '' --exp_name bt_with_comments_sa_final_modif_test --beam_size 1 --lambda_ae '0:1,100000:0.1,300000:0' --lg_sampling_factor '0.1' --eval_only true
--- a/selfmade/filter.py
+++ b/selfmade/filter.py
+from os import replace
+
+from numpy.core.shape_base import block
+
+from pandas.core.algorithms import isin
+import pandas as pd
+import re
+from tqdm import tqdm
+
+def fetch_func(par, ind, cont): # 传入函数参数在文件里的位置，返回函数体的始终位置；逻辑是找函数参数小括号后面的第一对大括号
+    t=0
+    for i in range(len(par)):
+        if par[i]>ind:
+            t=i
+            break
+    t0=t
+    lc=1
+    rc=0
+    while lc>rc:
+        t+=1
+        try:
+            if cont[par[t]]=='{':
+                lc+=1
+            else:
+                rc+=1
+        except Exception:
+            # print('exception', ind, par[t0], cont[ind-20:ind+20])
+            return 0, 0
+    return par[t0], par[t]
+
+if __name__ == '__main__':
+    
+    tet='''
+    __global__ void foo(int* a,
+    int *b){
+        int a;
+        if(condition){
+            int b;
+        }
+        for(int c; c<10; c++){
+            foo();
+        }
+    }
+    __global__ void reportThreadsKernel(int* threadIdsX, int* threadIdsY,
+                                    int* threadIdsZ, int* blockIdsX,
+                                    int* blockIdsY, int* blockIdsZ) {
+
+    int tid = (threadXStride * threadIdx.x) + (threadYStride * threadIdx.y) +
+            (threadZStride * threadIdx.z) + (blockXStride * blockIdx.x) +
+            (blockYStride * blockIdx.y) + (blockZStride * blockIdx.z);
+}    '''
+    
+    pat=re.compile(r'(\b[A-Za-z0-9_\ ]+\s+\b[A-Za-z0-9_]+\s*\(.*?\))(?=\s*\{)', re.DOTALL) # 找包含函数参数的括号
+    pat_par=re.compile(r'\{|\}') # 匹配大括号来给fetch_func用
+    pat_sent=re.compile(r'\;')
+    # print([re.sub(r'\s+', ' ', i) for i in pat.findall(tet)])
+    # exit()
+    
+    # df=pd.read_csv('test.csv')
+    # print(df.columns)
+    cnt=0
+    err=0
+    stop_set=set(['main', 'printf', 'operator'])
+    cnt1=0
+    
+    sizes=[2648, 66086, 183558, 153765] # 硬编码的大小，为了显示tqdm的bar
+    out_data={'repo_name':[], 'cpp':[], 'cpp_title':[],'cpp_path':[], 'cuda':[],  'cuda_title':[], 'cuda_path': []}
+    
+    for i in range(4):
+        df=pd.read_csv('consis_cuda_cpp_00000000000%d.gz'%i, compression='gzip')
+        # df=pd.read_csv('test.csv')
+        with tqdm(total=sizes[i]) as bar:
+        # with tqdm(total=50) as bar:
+            for ind, ser in df.iterrows():
+                cont, cont_1=ser.get('content'), ser.get('content_1')
+                bar.update(1)
+                bar.set_description('{} {}'.format(cnt, err))
+                if not (isinstance(cont, str) and isinstance(cont_1, str)): # 因为缩进以及元数据库的缺失，csv可能读出来会是个空
+                    err+=1
+                    continue
+                
+                fs=pat.finditer(cont) # cpp函数
+                gs=pat.finditer(cont_1) # cuda函数，用finditer是为了存下来起始位置
+                fp=[i.start() for i in pat_par.finditer(cont)]
+                gp=[i.start() for i in pat_par.finditer(cont_1)]
+
+                #以下g的变量代表cuda，f的变量代表cpp
+                
+                gfs=[]
+                gcs=[]
+                gts=[]
+                # ct=0
+                for g in gs:
+                    t0, t=fetch_func(gp, g.start(), cont_1)
+                    conti=cont_1[t0:t+1] # conti即content i, 
+                    if conti.find('block')>0 and conti.find('thread')>0 and conti.find('#')<0:
+                        gfs.append(re.sub(r'\s+', ' ', g.group()).split('(')[0].split(' ')[-1]) #gfs即函数名及参数（去缩进防止误判）
+                        gcs.append((t0, t+1)) #gcs即函数内容的起始偏移集合
+                        li=re.sub(r'\s+', ' ', g.group()).split('{')[0] 
+                        gts.append(''.join([li.split('(')[0].split(' ')[-1], '(', li.split('(')[1]]))#gts即函数名集合（这里逻辑其实有点冗余）
+                
+                pairs=[]
+                ffs=[]
+                for f in fs:
+                    fi=re.sub(r'\s+', ' ', f.group()).split('(')[0].split(' ')[-1]
+                    li=re.sub(r'\s+', ' ', f.group()).split('{')[0]
+                    ft=''.join([li.split('(')[0].split(' ')[-1], '(', li.split('(')[1]])
+                    # print(fi, fi in gfs)
+                    if fi in gfs and fi not in stop_set and len(fi)>5 and (fi not in ffs): #按照cpp的函数名找配对的，这里的逻辑可以再改改
+                        t0, t=fetch_func(fp, f.start(), cont)
+                        if t-t0<=10 or cont[t0:t+1].find('#')>0 or t-t0-cont[t0:t+1].find(';')<10:
+                            continue
+                        out_data['cpp'].append(cont[t0:t+1])
+                        out_data['cpp_title'].append(ft)
+                        pairi=gcs[gfs.index(fi)]
+                        out_data['cuda'].append(cont_1[pairi[0]:pairi[1]])
+                        out_data['cuda_title'].append(gts[gfs.index(fi)])
+                        out_data['cpp_path'].append(ser.get('path'))
+                        out_data['cuda_path'].append(ser.get('path_1'))
+                        out_data['repo_name'].append(ser.get('f0_'))
+                        ffs.append(fi)
+                
+                # cnt+=len(cfs)
+                # break
+                cnt=len(out_data['cpp'])
+                
+                continue
+                
+                # 后面的逻辑是一开始计数的时候用的
+                try:
+                    fs=[re.sub(r'\s+', ' ', i).split('(')[0].split(' ')[-1] for i in\
+                        pat.findall(cont)]
+                    gs=[re.sub(r'\s+', ' ', i).split('(')[0].split(' ')[-1] for i in\
+                        pat.findall(cont_1)]
+                except Exception:
+                    print(cont, cont_1)
+                    continue
+                
+                fs=[i for i in fs if len(i)>0]
+                gs=[i for i in gs if len(i)>0]
+                cnt+=len(set(fs).intersection(set(gs)).difference(stop_set))
+                
+                # print(fs, gs, set(fs).intersection(set(gs)).difference(stop_set))
+                # cnt1+=1
+                # if cnt1>5:
+                #     break
+    print(cnt, err)
+    dfi=pd.DataFrame(out_data)
+    dfi.to_csv('out_with_title.csv')
\ No newline at end of file
--- a/selfmade/tmp.py
+++ b/selfmade/tmp.py
+import gzip
+import json
+
+with gzip.open('../data/train_dataset/cuda/cuda.000.json.gz', 'rb') as f:
+    cnt=0
+    for line in f.read().decode().split('\n'):
+        i=json.loads(line)
+        if i['content'].split('(')[0]=='BilinearSamplingForward':
+            cnt+1
+print(cnt)
+with gzip.open('../data/train_dataset/cpp/cpp.000.json.gz', 'rb') as f:
+    for line in f.read().decode().split('\n'):
+        i=json.loads(line)
+        if i['content'].split('(')[0]=='BilinearSamplingForward':
+            cnt+1
+    print(cnt)
+
+with gzip.open('../data/train_dataset/cuda/cuda.000.json.gz', 'rb') as f:
+    cnt=0
+    for line in f.read().decode().split('\n'):
+        i=json.loads(line)
+        print(i['content'].split('(')[0], "###")
\ No newline at end of file
--- a/translate.py
+++ b/translate.py
@@ -15,6 +15,7 @@
 #

 import argparse
+from logging import Logger, getLogger
 import os
 import sys

@@ -82,22 +83,30 @@ class Translator:
        self.reloaded_params['reload_model'] = ','.join([params.model_path] * 2)
        encoder, decoder = build_model(self.reloaded_params, self.dico)

+        getLogger().info("fuck")
        self.encoder = encoder[0]
+        getLogger().info("fuck")
        self.encoder.load_state_dict(reloaded['encoder'])
+        getLogger().info("fuck")
        assert len(reloaded['encoder'].keys()) == len(
            list(p for p, _ in self.encoder.state_dict().items()))

+        getLogger().info("fuck")
+
        self.decoder = decoder[0]
        self.decoder.load_state_dict(reloaded['decoder'])
        assert len(reloaded['decoder'].keys()) == len(
            list(p for p, _ in self.decoder.state_dict().items()))
+        getLogger().info("fuck")

        self.encoder.cuda()
        self.decoder.cuda()

        self.encoder.eval()
        self.decoder.eval()
+        getLogger().info("fuck")
        self.bpe_model = fastBPE.fastBPE(os.path.abspath(params.BPE_path))
+        getLogger().info("fuck")

    def translate(self, input, lang1, lang2, n=1, beam_size=1, sample_temperature=None, device='cuda:0'):
        with torch.no_grad():