Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changing logging levels #222

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
9 changes: 9 additions & 0 deletions dlio_benchmark/common/enumerations.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,15 @@ class StorageType(Enum):

def __str__(self):
return self.value
class LogLevel(Enum):
"""
Different levels of logging
"""
DEBUG = "debug"
INFO = "info"
WARN = "warn"
zhenghh04 marked this conversation as resolved.
Show resolved Hide resolved
def __str__(self):
return self.value

class MetadataType(Enum):
"""
Expand Down
2 changes: 0 additions & 2 deletions dlio_benchmark/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,6 @@ def initialize(self):
- Start profiling session for Darshan and Tensorboard.
"""
self.comm.barrier()
if self.args.debug and self.args.my_rank == 0:
input("Debug mode: Press enter to start\n")

if self.args.generate_data:
if self.args.my_rank == 0:
Expand Down
15 changes: 10 additions & 5 deletions dlio_benchmark/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

from dlio_benchmark.common.constants import MODULE_CONFIG
from dlio_benchmark.common.enumerations import StorageType, FormatType, Shuffle, ReadType, FileAccess, Compression, \
FrameworkType, \
FrameworkType, LogLevel, \
DataLoaderType, Profiler, DatasetType, DataLoaderSampler, CheckpointLocationType, CheckpointMechanismType
from dlio_benchmark.utils.utility import DLIOMPI, get_trace_name, utcnow
from dataclasses import dataclass
Expand Down Expand Up @@ -91,7 +91,7 @@ class ConfigArguments:
chunk_size: int = 0
compression: Compression = Compression.NONE
compression_level: int = 4
debug: bool = False
log_level: LogLevel = LogLevel.INFO
total_training_steps: int = -1
do_eval: bool = False
batch_size_eval: int = 1
Expand Down Expand Up @@ -167,7 +167,12 @@ def configure_dlio_logging(self, is_child=False):
if is_child and self.multiprocessing_context == "fork":
return
# Configure the logging library
log_level = logging.DEBUG if self.debug else logging.INFO
if self.log_level == LogLevel.DEBUG:
log_level = logging.DEBUG
elif self.log_level == LogLevel.WARN:
log_level = logging.WARN
else:
log_level = logging.INFO
logging.basicConfig(
level=log_level,
force=True,
Expand Down Expand Up @@ -558,8 +563,8 @@ def LoadConfig(args, config):
args.generate_only = True
else:
args.generate_only = False
if 'debug' in config['workflow']:
args.debug = config['workflow']['debug']
if 'log_level' in config['workflow']:
args.log_level = LogLevel(config['workflow']['log_level'])
if 'evaluation' in config['workflow']:
args.do_eval = config['workflow']['evaluation']
if 'checkpoint' in config['workflow']:
Expand Down
4 changes: 2 additions & 2 deletions dlio_benchmark/utils/statscounter.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ def batch_processed(self, epoch, step, block, t0, computation_time):
else:
self.output[epoch]['proc'] = [duration]
self.output[epoch]['compute']=[computation_time]
logging.info(f"{utcnow()} Rank {self.my_rank} step {step} processed {self.batch_size} samples in {duration} s")
logging.debug(f"{utcnow()} Rank {self.my_rank} step {step} processed {self.batch_size} samples in {duration} s")

def compute_metrics_train(self, epoch, block):
key = f"block{block}"
Expand Down Expand Up @@ -358,7 +358,7 @@ def eval_batch_processed(self, epoch, step, t0, computation_time):
duration = time() - t0
self.output[epoch]['proc']['eval'].append(duration)
self.output[epoch]['compute']['eval'].append(computation_time)
logging.info(f"{utcnow()} Rank {self.my_rank} step {step} processed {self.batch_size_eval} samples in {duration} s")
logging.debug(f"{utcnow()} Rank {self.my_rank} step {step} processed {self.batch_size_eval} samples in {duration} s")
def finalize(self):
self.summary['end'] = utcnow()
def save_data(self):
Expand Down
24 changes: 12 additions & 12 deletions dlio_benchmark/utils/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,38 +37,38 @@
from dftracer.logger import dftracer as PerfTrace, dft_fn as Profile, DFTRACER_ENABLE as DFTRACER_ENABLE
except:
class Profile(object):
def __init__(self, **kwargs):
return
def log(self, **kwargs):
return
def log_init(self, **kwargs):
return
def iter(self, **kwargs):
def __init__(self, cat, name=None, epoch=None, step=None, image_idx=None, image_size=None):
hariharan-devarajan marked this conversation as resolved.
Show resolved Hide resolved
return
def log(self, func):
return func
def log_init(self, func):
return func
def iter(self, func, iter_name="step"):
return func
def __enter__(self):
return
def __exit__(self, **kwargs):
def __exit__(self, type, value, traceback):
return
def update(self, **kwargs):
def update(self, epoch=None, step=None, image_idx=None, image_size=None, args={}):
hariharan-devarajan marked this conversation as resolved.
Show resolved Hide resolved
return
def flush(self):
return
def reset(self):
return
def log_static(self, **kwargs):
def log_static(self, func):
return
class dftracer(object):
def __init__(self,):
self.type = None
def initialize_log(self, **kwargs):
def initialize_log(self, logfile=None, data_dir=None, process_id=-1):
return
def get_time(self):
return
def enter_event(self):
return
def exit_event(self):
return
def log_event(self, **kwargs):
def log_event(self, name, cat, start_time, duration, string_args=None):
hariharan-devarajan marked this conversation as resolved.
Show resolved Hide resolved
return
def finalize(self):
return
Expand Down
Loading