In [55]:
!pip install -q -U bitsandbytes 
!pip install -q -U transformers 
!pip install -q -U peft accelerate datasets scipy einops evaluate
!pip install -q -U  trl rouge_score



In [56]:
pip install --upgrade pip

Note: you may need to restart the kernel to use updated packages.


In [57]:
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    GenerationConfig
)
from tqdm import tqdm
from trl import SFTTrainer
import torch
import time
import pandas as pd
import numpy as np
from huggingface_hub import interpreter_login

interpreter_login()


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|



Enter your token (input will not be visible):  ········
Add token as git credential? (Y/n)  n


In [58]:
import os
# disable Weights and Biases
os.environ['WANDB_DISABLED']="true"

In [59]:
huggingface_dataset_name = "neil-code/dialogsum-test"
dataset = load_dataset(huggingface_dataset_name)

In [60]:
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 1999
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 499
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 499
    })
})


In [61]:

compute_dtype = getattr(torch, "float16")
bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type='nf4',
        bnb_4bit_compute_dtype=compute_dtype,
        bnb_4bit_use_double_quant=False,
    )

In [62]:

model_name='microsoft/phi-2'
device_map = {"": 0}
original_model = AutoModelForCausalLM.from_pretrained(model_name, 
                                                      device_map=device_map,
                                                      quantization_config=bnb_config,
                                                      trust_remote_code=True,
                                                      use_auth_token=True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [63]:
tokenizer = AutoTokenizer.from_pretrained(model_name,trust_remote_code=True,padding_side="left",add_eos_token=True,add_bos_token=True,use_fast=False)
tokenizer.pad_token = tokenizer.eos_token

In [64]:
def gen(model, prompt, max_new_tokens=100, tokenizer=None, device='cuda' if torch.cuda.is_available() else 'cpu'):
    """
    use specified model and tokenizer to generate text.

    parameter：
        model: a transformers model that has loaded （by AutoModelForCausalLM）
        prompt: the input  prompt
        max_new_tokens: max tokens that generate
        tokenizer: tokenizer you use
        device: device（by default: 'cuda' , if can't  use gpu,  'cpu' instead）

    return：
        text that generated（mostly length by 1）

    """
    if tokenizer is None:
        raise ValueError("tokenizer cannot be None.")

    model.to(device)
    inputs = tokenizer(prompt, return_tensors='pt').to(device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=False,
            eos_token_id=tokenizer.eos_token_id,
        )
    return tokenizer.batch_decode(outputs, skip_special_tokens=True)


In [65]:
%%time
from transformers import set_seed
seed = 42
set_seed(seed)

index = 10

prompt = dataset['test'][index]['dialogue']
summary = dataset['test'][index]['summary']

formatted_prompt = f"Instruct: Summarize the following conversation.\n{prompt}\nOutput:\n"
res = gen(original_model,formatted_prompt,100, tokenizer, )
#print(res[0])
output = res[0].split('Output:\n')[1]

dash_line = '-'.join('' for x in range(100))
print(dash_line)
print(f'INPUT PROMPT:\n{formatted_prompt}')
print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
print(dash_line)
print(f'MODEL GENERATION - ZERO SHOT:\n{output}')

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


---------------------------------------------------------------------------------------------------
INPUT PROMPT:
Instruct: Summarize the following conversation.
#Person1#: Happy Birthday, this is for you, Brian.
#Person2#: I'm so happy you remember, please come in and enjoy the party. Everyone's here, I'm sure you have a good time.
#Person1#: Brian, may I have a pleasure to have a dance with you?
#Person2#: Ok.
#Person1#: This is really wonderful party.
#Person2#: Yes, you are always popular with everyone. and you look very pretty today.
#Person1#: Thanks, that's very kind of you to say. I hope my necklace goes with my dress, and they both make me look good I feel.
#Person2#: You look great, you are absolutely glowing.
#Person1#: Thanks, this is a fine party. We should have a drink together to celebrate your birthday
Output:

---------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
#Person1# attends Brian's birthday pa

In [66]:
torch.cuda.is_available()
def create_prompt_formats(sample):
    """
    Format various fields of the sample ('instruction','output')
    Then concatenate them using two newline characters 
    :param sample: Sample dictionnary
    """
    INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
    INSTRUCTION_KEY = "### Instruct: Summarize the below conversation."
    RESPONSE_KEY = "### Output:"
    END_KEY = "### End"
    
    blurb = f"\n{INTRO_BLURB}"
    instruction = f"{INSTRUCTION_KEY}"
    input_context = f"{sample['dialogue']}" if sample["dialogue"] else None
    response = f"{RESPONSE_KEY}\n{sample['summary']}"
    end = f"{END_KEY}"
    
    parts = [part for part in [blurb, instruction, input_context, response, end] if part]

    formatted_prompt = "\n\n".join(parts)
    sample["text"] = formatted_prompt

    return sample

In [67]:
from functools import partial

# SOURCE https://github.com/databrickslabs/dolly/blob/master/training/trainer.py
def get_max_length(model):
    conf = model.config
    max_length = None
    for length_setting in ["n_positions", "max_position_embeddings", "seq_length"]:
        max_length = getattr(model.config, length_setting, None)
        if max_length:
            print(f"Found max length: {max_length}")
            break
    if not max_length:
        max_length = 1024
        print(f"Using default max length: {max_length}")
    return max_length


def preprocess_batch(batch, tokenizer, max_length):
    """
    Tokenizing a batch
    """
    return tokenizer(
        batch["text"],
        max_length=max_length,
        truncation=True,
    )

# SOURCE https://github.com/databrickslabs/dolly/blob/master/training/trainer.py
def preprocess_dataset(tokenizer: AutoTokenizer, max_length: int,seed, dataset):
    """Format & tokenize it so it is ready for training
    :param tokenizer (AutoTokenizer): Model Tokenizer
    :param max_length (int): Maximum number of tokens to emit from tokenizer
    """

    # Add prompt to each sample
    print("Preprocessing dataset...")
    dataset = dataset.map(create_prompt_formats)#, batched=True)

    # Apply preprocessing to each batch of the dataset & and remove 'instruction', 'context', 'response', 'category' fields
    _preprocessing_function = partial(preprocess_batch, max_length=max_length, tokenizer=tokenizer)
    dataset = dataset.map(
        _preprocessing_function,
        batched=True,
        remove_columns=['id', 'topic', 'dialogue', 'summary'],
    )

    # Filter out samples that have input_ids exceeding max_length
    dataset = dataset.filter(lambda sample: len(sample["input_ids"]) < max_length)

    # Shuffle dataset
    dataset = dataset.shuffle(seed=seed)

    return dataset


In [68]:
## Pre-process dataset
max_length = get_max_length(original_model)
print(max_length)

train_dataset = preprocess_dataset(tokenizer, max_length,seed, dataset['train'])
eval_dataset = preprocess_dataset(tokenizer, max_length,seed, dataset['validation'])

Found max length: 2048
2048
Preprocessing dataset...


Map:   0%|          | 0/1999 [00:00<?, ? examples/s]

Map:   0%|          | 0/1999 [00:00<?, ? examples/s]

Filter:   0%|          | 0/1999 [00:00<?, ? examples/s]

Preprocessing dataset...


Map:   0%|          | 0/499 [00:00<?, ? examples/s]

Map:   0%|          | 0/499 [00:00<?, ? examples/s]

Filter:   0%|          | 0/499 [00:00<?, ? examples/s]

In [69]:
print(train_dataset)
print(train_dataset['text'][0])
print(train_dataset['input_ids'][0])
print(train_dataset['attention_mask'][0])

Dataset({
    features: ['text', 'input_ids', 'attention_mask'],
    num_rows: 1999
})

Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruct: Summarize the below conversation.

#Person1#: How are your French lessons going?
#Person2#: Well, I'm no longer taking French lessons.
#Person1#: Are you kidding? You told me you made up your mind to study French well this summer. Didn't you sign up for the four-week course?
#Person2#: I did. But the teacher told me not to come back any more after only one week and he returned my money for the remaining three weeks.
#Person1#: How come? I've never heard of a case like that before. Did you have a quarrel with your teacher?
#Person2#: Of course not. At first everything went well and he was satisfied with me. But he got angry after I broke the class rules several times.
#Person1#: It was your fault, I think. You'd gone too far.
#Person2#: Perhaps. But I don't understand why he told m

In [70]:
from peft import prepare_model_for_kbit_training
original_model = prepare_model_for_kbit_training(original_model)

In [71]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

config = LoraConfig(
    r=32, #Rank
    lora_alpha=32,
    target_modules=[
        'q_proj',
        'k_proj',
        'v_proj',
        'dense'
    ],
    bias="none",
    lora_dropout=0.05,  # Conventional
    task_type="CAUSAL_LM",
)

# 1 - Enabling gradient checkpointing to reduce memory usage during fine-tuning
original_model.gradient_checkpointing_enable()

peft_model = get_peft_model(original_model, config)


In [72]:
def print_number_of_trainable_model_parameters(model):
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total_params = sum(p.numel() for p in model.parameters())
    print(f"Trainable parameters: {trainable_params}")
    print(f"All parameters: {total_params}")
    print(f"Percentage of trainable parameters: {100 * trainable_params / total_params:.2f}%")


In [73]:
print(print_number_of_trainable_model_parameters(peft_model))

Trainable parameters: 20971520
All parameters: 1542364160
Percentage of trainable parameters: 1.36%
None


In [74]:

output_dir = f'./peft-dialogue-summary-training-{str(int(time.time()))}'
import transformers

peft_training_args = TrainingArguments(
    output_dir = output_dir,
    warmup_steps=1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    max_steps=1000,
    learning_rate=2e-4,
    optim="paged_adamw_8bit",
    logging_steps=25,
    logging_dir="./logs",
    save_strategy="steps",
    save_steps=25,
    eval_strategy="steps",
    eval_steps=25,
    do_eval=True,
    gradient_checkpointing=True,
    report_to="none",
    overwrite_output_dir = True,
    group_by_length=True,
)

peft_model.config.use_cache = False

peft_trainer = transformers.Trainer(
    model=peft_model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    args=peft_training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [75]:
import transformers
print(transformers.__version__)


4.52.4


In [76]:
peft_trainer.train()

Step,Training Loss,Validation Loss
25,1.6623,1.361354
50,1.1911,1.335325
75,1.4449,1.314655
100,1.2057,1.318654
125,1.4366,1.304205
150,1.1362,1.318154
175,1.4025,1.302562
200,1.1475,1.302252
225,1.4437,1.295985
250,1.2241,1.297235


TrainOutput(global_step=1000, training_loss=1.2922991542816162, metrics={'train_runtime': 9817.4201, 'train_samples_per_second': 0.407, 'train_steps_per_second': 0.102, 'total_flos': 1.848044708960256e+16, 'train_loss': 1.2922991542816162, 'epoch': 2.0})

In [77]:
!ls

peft-dialogue-summary-training-1748683619


In [81]:
!zip -r checkpoint-1000.zip peft-dialogue-summary-training-1748683619/checkpoint-1000/

  adding: peft-dialogue-summary-training-1748683619/checkpoint-1000/ (stored 0%)
  adding: peft-dialogue-summary-training-1748683619/checkpoint-1000/vocab.json (deflated 68%)
  adding: peft-dialogue-summary-training-1748683619/checkpoint-1000/training_args.bin (deflated 51%)
  adding: peft-dialogue-summary-training-1748683619/checkpoint-1000/optimizer.pt (deflated 11%)
  adding: peft-dialogue-summary-training-1748683619/checkpoint-1000/trainer_state.json (deflated 83%)
  adding: peft-dialogue-summary-training-1748683619/checkpoint-1000/merges.txt (deflated 53%)
  adding: peft-dialogue-summary-training-1748683619/checkpoint-1000/README.md (deflated 66%)
  adding: peft-dialogue-summary-training-1748683619/checkpoint-1000/adapter_model.safetensors (deflated 8%)
  adding: peft-dialogue-summary-training-1748683619/checkpoint-1000/added_tokens.json (deflated 84%)
  adding: peft-dialogue-summary-training-1748683619/checkpoint-1000/rng_state.pth (deflated 25%)
  adding: peft-dialogue-summary-t

In [83]:

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

base_model_id = "microsoft/phi-2"
base_model = AutoModelForCausalLM.from_pretrained(base_model_id,
                                                      device_map='auto',
                                                      quantization_config=bnb_config,
                                                      trust_remote_code=True,
                                                      use_auth_token=True)




Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [84]:
eval_tokenizer = AutoTokenizer.from_pretrained(base_model_id, add_bos_token=True, trust_remote_code=True, use_fast=False)
eval_tokenizer.pad_token = eval_tokenizer.eos_token


In [87]:
from peft import PeftModel

ft_model = PeftModel.from_pretrained(base_model,
                                     "/kaggle/working/peft-dialogue-summary-training-1748683619/checkpoint-1000", # PEFT adapter path
                                     torch_dtype=torch.float16, # model precision
                                     is_trainable=False # only for inference.
                                     )




In [89]:
%%time
from transformers import set_seed
set_seed(seed)

index = 5
dialogue = dataset['test'][index]['dialogue']
summary = dataset['test'][index]['summary']

prompt = f"Instruct: Summarize the following conversation.\n{dialogue}\nOutput:\n"

peft_model_res = gen(ft_model,prompt,100,tokenizer,)
peft_model_output = peft_model_res[0].split('Output:\n')[1]
#print(peft_model_output)
prefix, success, result = peft_model_output.partition('###')

dash_line = '-'.join('' for x in range(100))
print(dash_line)
print(f'INPUT PROMPT:\n{prompt}')
print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
print(dash_line)
print(f'PEFT MODEL:\n{prefix}')


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


---------------------------------------------------------------------------------------------------
INPUT PROMPT:
Instruct: Summarize the following conversation.
#Person1#: You're finally here! What took so long?
#Person2#: I got stuck in traffic again. There was a terrible traffic jam near the Carrefour intersection.
#Person1#: It's always rather congested down there during rush hour. Maybe you should try to find a different route to get home.
#Person2#: I don't think it can be avoided, to be honest.
#Person1#: perhaps it would be better if you started taking public transport system to work.
#Person2#: I think it's something that I'll have to consider. The public transport system is pretty good.
#Person1#: It would be better for the environment, too.
#Person2#: I know. I feel bad about how much my car is adding to the pollution problem in this city.
#Person1#: Taking the subway would be a lot less stressful than driving as well.
#Person2#: The only problem is that I'm going to really 

In [90]:

original_model = AutoModelForCausalLM.from_pretrained(base_model_id,
                                                      device_map='auto',
                                                      quantization_config=bnb_config,
                                                      trust_remote_code=True,
                                                      use_auth_token=True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [92]:
import pandas as pd

dialogues = dataset['test'][0:10]['dialogue']
human_baseline_summaries = dataset['test'][0:10]['summary'] # from dataset\['test'\] fetch 10 dialogue and summary, for generate and compare.

original_model_summaries = []
instruct_model_summaries = []
peft_model_summaries = []

# generate summary for each dialogue.
for idx, dialogue in enumerate(dialogues):
    human_baseline_text_output = human_baseline_summaries[idx]
    # create prompt
    prompt = f"Instruct: Summarize the following conversation.\n{dialogue}\nOutput:\n"
    # original model's result
    original_model_res = gen(original_model,prompt,100,tokenizer,)
    original_model_text_output = original_model_res[0].split('Output:\n')[1]

    # peft_model's result
    peft_model_res = gen(ft_model,prompt,100,tokenizer,)
    peft_model_output = peft_model_res[0].split('Output:\n')[1]
    print(peft_model_output)
    peft_model_text_output, success, result = peft_model_output.partition('###')

    original_model_summaries.append(original_model_text_output)
    peft_model_summaries.append(peft_model_text_output)

# create a dataframe for compare.
zipped_summaries = list(zip(human_baseline_summaries, original_model_summaries, peft_model_summaries))

df = pd.DataFrame(zipped_summaries, columns = ['human_baseline_summaries', 'original_model_summaries', 'peft_model_summaries'])
df


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


#Person1# asks Ms. Dawson to take a dictation for #Person1#. #Person1# tells Ms. Dawson that all office communications are restricted to email correspondence and official memos. #Person1# also tells Ms. Dawson that any employee who persists in using Instant Messaging will face termination.

### End of Output

### Output End

### Input End

### Output Start
#Person1#: Ms. Dawson, I need you to take a


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


#Person1# asks Ms. Dawson to take a dictation for #Person1#. #Person1# tells Ms. Dawson that all office communications are restricted to email correspondence and official memos. #Person1# also tells Ms. Dawson that any employee who persists in using Instant Messaging will face termination.

### End of Output

### Output End

### Input End

### Output Start
#Person1#: Ms. Dawson, I need you to take a


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


#Person1# asks Ms. Dawson to take a dictation for #Person1#. #Person1# tells Ms. Dawson that all office communications are restricted to email correspondence and official memos. #Person1# also tells Ms. Dawson that any employee who persists in using Instant Messaging will face termination.

### End of Output

### Output End

### Input End

### Output Start
#Person1#: Ms. Dawson, I need you to take a


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


#Person2# got stuck in traffic again and #Person1# suggests #Person2# should consider taking public transport system to work. #Person2# agrees and will quit driving to work.

### End of Output ###

### Start of Output 2 ###
#Person1#: I'm so excited to see you again!
#Person2#: I'm excited too!
#Person1#: I've been thinking about you a lot lately.
#Person


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


#Person2# got stuck in traffic again and #Person1# suggests #Person2# should consider taking public transport system to work. #Person2# agrees and will quit driving to work.

### End of Output ###

### Start of Output 2 ###
#Person1#: I'm so excited to see you again!
#Person2#: I'm excited too!
#Person1#: I've been thinking about you a lot lately.
#Person


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


#Person2# got stuck in traffic again and #Person1# suggests #Person2# should consider taking public transport system to work. #Person2# agrees and will quit driving to work.

### End of Output ###

### Start of Output 2 ###
#Person1#: I'm so excited to see you again!
#Person2#: I'm excited too!
#Person1#: I've been thinking about you a lot lately.
#Person


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Masha and Hero are getting divorced. Masha tells Kate that they are having a separation for 2 months and filed for divorce. Kate is surprised.



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Masha and Hero are getting divorced. Masha tells Kate that they are having a separation for 2 months and filed for divorce. Kate is surprised.



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Masha and Hero are getting divorced. Masha tells Kate that they are having a separation for 2 months and filed for divorce. Kate is surprised.



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Brian's birthday party is going well. #Person1# compliments Brian and they have a dance together.

#EndOfOutput#

### 


Unnamed: 0,human_baseline_summaries,original_model_summaries,peft_model_summaries
0,Ms. Dawson helps #Person1# to write a memo to ...,"Person 1: Ms. Dawson, I need you to take a dic...",#Person1# asks Ms. Dawson to take a dictation ...
1,In order to prevent employees from wasting tim...,"Person 1: Ms. Dawson, I need you to take a dic...",#Person1# asks Ms. Dawson to take a dictation ...
2,Ms. Dawson takes a dictation for #Person1# abo...,"Person 1: Ms. Dawson, I need you to take a dic...",#Person1# asks Ms. Dawson to take a dictation ...
3,#Person2# arrives late because of traffic jam....,Person1 and Person2 are discussing the traffic...,#Person2# got stuck in traffic again and #Pers...
4,#Person2# decides to follow #Person1#'s sugges...,Person1 and Person2 are discussing the traffic...,#Person2# got stuck in traffic again and #Pers...
5,#Person2# complains to #Person1# about the tra...,Person1 and Person2 are discussing the traffic...,#Person2# got stuck in traffic again and #Pers...
6,#Person1# tells Kate that Masha and Hero get d...,Kate informed that Masha and Hero are getting ...,Masha and Hero are getting divorced. Masha tel...
7,#Person1# tells Kate that Masha and Hero are g...,Kate informed that Masha and Hero are getting ...,Masha and Hero are getting divorced. Masha tel...
8,#Person1# and Kate talk about the divorce betw...,Kate informed that Masha and Hero are getting ...,Masha and Hero are getting divorced. Masha tel...
9,#Person1# and Brian are at the birthday party ...,"Person1 and Person2 are at a party, and Person...",Brian's birthday party is going well. #Person1...


In [94]:
import pandas as pd

dialogues = dataset['test'][0:10]['dialogue']
human_baseline_summaries = dataset['test'][0:10]['summary'] # from dataset\['test'\] fetch 10 dialogue and summary, for generate and compare.

original_model_summaries = []
instruct_model_summaries = []
peft_model_summaries = []

# generate summary for each dialogue.
for idx, dialogue in enumerate(dialogues):
    human_baseline_text_output = human_baseline_summaries[idx]
    # create prompt
    prompt = f"Instruct: Summarize the following conversation.\n{dialogue}\nOutput:\n"
    # original model's result
    original_model_res = gen(original_model,prompt,100,tokenizer,)
    original_model_text_output = original_model_res[0].split('Output:\n')[1]

    # peft_model's result
    peft_model_res = gen(ft_model,prompt,100,tokenizer,)
    peft_model_output = peft_model_res[0].split('Output:\n')[1]
    print(peft_model_output)
    peft_model_text_output, success, result = peft_model_output.partition('###')

    original_model_summaries.append(original_model_text_output)
    peft_model_summaries.append(peft_model_text_output)

# create a dataframe for compare.
zipped_summaries = list(zip(human_baseline_summaries, original_model_summaries, peft_model_summaries))

df = pd.DataFrame(zipped_summaries, columns = ['human_baseline_summaries', 'original_model_summaries', 'peft_model_summaries'])
df

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


#Person1# asks Ms. Dawson to take a dictation for #Person1#. #Person1# tells Ms. Dawson that all office communications are restricted to email correspondence and official memos. #Person1# also tells Ms. Dawson that any employee who persists in using Instant Messaging will face termination.

### End of Output

### Output End

### Input End

### Output Start
#Person1#: Ms. Dawson, I need you to take a


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


#Person1# asks Ms. Dawson to take a dictation for #Person1#. #Person1# tells Ms. Dawson that all office communications are restricted to email correspondence and official memos. #Person1# also tells Ms. Dawson that any employee who persists in using Instant Messaging will face termination.

### End of Output

### Output End

### Input End

### Output Start
#Person1#: Ms. Dawson, I need you to take a


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


#Person1# asks Ms. Dawson to take a dictation for #Person1#. #Person1# tells Ms. Dawson that all office communications are restricted to email correspondence and official memos. #Person1# also tells Ms. Dawson that any employee who persists in using Instant Messaging will face termination.

### End of Output

### Output End

### Input End

### Output Start
#Person1#: Ms. Dawson, I need you to take a


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


#Person2# got stuck in traffic again and #Person1# suggests #Person2# should consider taking public transport system to work. #Person2# agrees and will quit driving to work.

### End of Output ###

### Start of Output 2 ###
#Person1#: I'm so excited to see you again!
#Person2#: I'm excited too!
#Person1#: I've been thinking about you a lot lately.
#Person


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


#Person2# got stuck in traffic again and #Person1# suggests #Person2# should consider taking public transport system to work. #Person2# agrees and will quit driving to work.

### End of Output ###

### Start of Output 2 ###
#Person1#: I'm so excited to see you again!
#Person2#: I'm excited too!
#Person1#: I've been thinking about you a lot lately.
#Person


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


#Person2# got stuck in traffic again and #Person1# suggests #Person2# should consider taking public transport system to work. #Person2# agrees and will quit driving to work.

### End of Output ###

### Start of Output 2 ###
#Person1#: I'm so excited to see you again!
#Person2#: I'm excited too!
#Person1#: I've been thinking about you a lot lately.
#Person


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Masha and Hero are getting divorced. Masha tells Kate that they are having a separation for 2 months and filed for divorce. Kate is surprised.



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Masha and Hero are getting divorced. Masha tells Kate that they are having a separation for 2 months and filed for divorce. Kate is surprised.



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Masha and Hero are getting divorced. Masha tells Kate that they are having a separation for 2 months and filed for divorce. Kate is surprised.



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Brian's birthday party is going well. #Person1# compliments Brian and they have a dance together.

#EndOfOutput#

### 


Unnamed: 0,human_baseline_summaries,original_model_summaries,peft_model_summaries
0,Ms. Dawson helps #Person1# to write a memo to ...,"Person 1: Ms. Dawson, I need you to take a dic...",#Person1# asks Ms. Dawson to take a dictation ...
1,In order to prevent employees from wasting tim...,"Person 1: Ms. Dawson, I need you to take a dic...",#Person1# asks Ms. Dawson to take a dictation ...
2,Ms. Dawson takes a dictation for #Person1# abo...,"Person 1: Ms. Dawson, I need you to take a dic...",#Person1# asks Ms. Dawson to take a dictation ...
3,#Person2# arrives late because of traffic jam....,Person1 and Person2 are discussing the traffic...,#Person2# got stuck in traffic again and #Pers...
4,#Person2# decides to follow #Person1#'s sugges...,Person1 and Person2 are discussing the traffic...,#Person2# got stuck in traffic again and #Pers...
5,#Person2# complains to #Person1# about the tra...,Person1 and Person2 are discussing the traffic...,#Person2# got stuck in traffic again and #Pers...
6,#Person1# tells Kate that Masha and Hero get d...,Kate informed that Masha and Hero are getting ...,Masha and Hero are getting divorced. Masha tel...
7,#Person1# tells Kate that Masha and Hero are g...,Kate informed that Masha and Hero are getting ...,Masha and Hero are getting divorced. Masha tel...
8,#Person1# and Kate talk about the divorce betw...,Kate informed that Masha and Hero are getting ...,Masha and Hero are getting divorced. Masha tel...
9,#Person1# and Brian are at the birthday party ...,"Person1 and Person2 are at a party, and Person...",Brian's birthday party is going well. #Person1...


In [95]:

import evaluate

# use rouge to compare the summary of original model and peft model to human/baseline one.

rouge = evaluate.load('rouge')

# evaluate original model.
original_model_results = rouge.compute(
    predictions=original_model_summaries,
    references=human_baseline_summaries[0:len(original_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)
# evaluate the peft model.
peft_model_results = rouge.compute(
    predictions=peft_model_summaries,
    references=human_baseline_summaries[0:len(peft_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)

# output the score of ROUGE.
print('ORIGINAL MODEL:')
print(original_model_results)
print('PEFT MODEL:')
print(peft_model_results)

print("Absolute percentage improvement of PEFT MODEL over ORIGINAL MODEL")
# compute the improvement of peft.
improvement = (np.array(list(peft_model_results.values())) - np.array(list(original_model_results.values())))
for key, value in zip(peft_model_results.keys(), improvement):
    print(f'{key}: {value*100:.2f}%')


Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

ORIGINAL MODEL:
{'rouge1': 0.3065619783479817, 'rouge2': 0.11267675405541684, 'rougeL': 0.2160443939546023, 'rougeLsum': 0.22696473210384666}
PEFT MODEL:
{'rouge1': 0.45906232081029863, 'rouge2': 0.17773346124919315, 'rougeL': 0.31460690575673744, 'rougeLsum': 0.3144817077528965}
Absolute percentage improvement of PEFT MODEL over ORIGINAL MODEL
rouge1: 15.25%
rouge2: 6.51%
rougeL: 9.86%
rougeLsum: 8.75%
